github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/profiles.go (about) 1 package phlaredb 2 3 import ( 4 "context" 5 "fmt" 6 "sort" 7 "sync" 8 9 "github.com/gogo/status" 10 "github.com/opentracing/opentracing-go" 11 "github.com/pkg/errors" 12 "github.com/prometheus/common/model" 13 "github.com/prometheus/prometheus/model/labels" 14 "github.com/prometheus/prometheus/promql/parser" 15 "github.com/prometheus/prometheus/storage" 16 "github.com/samber/lo" 17 "go.uber.org/atomic" 18 "google.golang.org/grpc/codes" 19 20 ingestv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1" 21 "github.com/grafana/pyroscope/pkg/iter" 22 phlaremodel "github.com/grafana/pyroscope/pkg/model" 23 "github.com/grafana/pyroscope/pkg/phlaredb/query" 24 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 25 "github.com/grafana/pyroscope/pkg/phlaredb/tsdb" 26 "github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index" 27 ) 28 29 // delta encoding for ranges 30 type rowRange struct { 31 rowNum int64 32 length int 33 } 34 35 type rowRangeWithSeriesIndex struct { 36 *rowRange 37 seriesIndex uint32 38 } 39 40 // those need to be strictly ordered 41 type rowRangesWithSeriesIndex []rowRangeWithSeriesIndex 42 43 // getSeriesIndex returns the series index for a given row number. 44 // searchHint is the hint for the index to start searching from, it should be passed to next call of this function. 45 func (s rowRangesWithSeriesIndex) getSeriesIndex(rowNum int64, searchHint *int) uint32 { 46 if *searchHint < 0 || *searchHint >= len(s) { 47 *searchHint = 0 48 } 49 for i := *searchHint; i < len(s); i++ { 50 rg := s[i] 51 // it is possible that the series is not existing 52 if rg.rowRange == nil { 53 continue 54 } 55 if rg.rowNum <= rowNum && rg.rowNum+int64(rg.length) > rowNum { 56 *searchHint = i 57 return rg.seriesIndex 58 } 59 } 60 panic("series index not found") 61 } 62 63 type rowRanges map[rowRange]model.Fingerprint 64 65 func (rR rowRanges) iter() iter.Iterator[fingerprintWithRowNum] { 66 // ensure row ranges is sorted 67 rRSlice := lo.Keys(rR) 68 sort.Slice(rRSlice, func(i, j int) bool { 69 return rRSlice[i].rowNum < rRSlice[j].rowNum 70 }) 71 72 fps := make([]model.Fingerprint, 0, len(rR)) 73 for _, elem := range rRSlice { 74 fps = append(fps, rR[elem]) 75 } 76 77 return &rowRangesIter{ 78 r: rRSlice, 79 fps: fps, 80 pos: 0, 81 } 82 } 83 84 type fingerprintWithRowNum struct { 85 fp model.Fingerprint 86 rowNum int64 87 } 88 89 func (f fingerprintWithRowNum) RowNumber() int64 { 90 return f.rowNum 91 } 92 93 func (r rowRanges) fingerprintsWithRowNum() query.Iterator { 94 return query.NewRowNumberIterator(r.iter()) 95 } 96 97 type rowRangesIter struct { 98 r []rowRange 99 fps []model.Fingerprint 100 pos int64 101 } 102 103 func (i *rowRangesIter) At() fingerprintWithRowNum { 104 return fingerprintWithRowNum{ 105 rowNum: i.pos - 1, 106 fp: i.fps[0], 107 } 108 } 109 110 func (i *rowRangesIter) Next() bool { 111 if len(i.r) == 0 { 112 return false 113 } 114 if i.pos < i.r[0].rowNum { 115 i.pos = i.r[0].rowNum 116 } 117 118 if i.pos >= i.r[0].rowNum+int64(i.r[0].length) { 119 i.r = i.r[1:] 120 i.fps = i.fps[1:] 121 return i.Next() 122 } 123 i.pos++ 124 return true 125 } 126 127 func (i *rowRangesIter) Close() error { return nil } 128 129 func (i *rowRangesIter) Err() error { return nil } 130 131 type profileSeries struct { 132 lbs phlaremodel.Labels 133 fp model.Fingerprint 134 135 minTime, maxTime int64 136 137 // profiles in memory 138 profiles []*schemav1.InMemoryProfile 139 140 // profiles temporary stored on disk in row group segements 141 // TODO: this information is crucial to recover segements to a full block later 142 profilesOnDisk []*rowRange 143 } 144 145 type profilesIndex struct { 146 ix *tsdb.BitPrefixInvertedIndex 147 // todo: like the inverted index we might want to shard fingerprint to avoid contentions. 148 profilesPerFP map[model.Fingerprint]*profileSeries 149 mutex sync.RWMutex 150 totalProfiles *atomic.Int64 151 totalSeries *atomic.Int64 152 rowGroupsOnDisk int 153 154 metrics *headMetrics 155 } 156 157 func newProfileIndex(totalShards uint32, metrics *headMetrics) (*profilesIndex, error) { 158 ix, err := tsdb.NewBitPrefixWithShards(totalShards) 159 if err != nil { 160 return nil, err 161 } 162 return &profilesIndex{ 163 ix: ix, 164 profilesPerFP: make(map[model.Fingerprint]*profileSeries), 165 totalProfiles: atomic.NewInt64(0), 166 totalSeries: atomic.NewInt64(0), 167 metrics: metrics, 168 }, nil 169 } 170 171 // Add a new set of profile to the index. 172 // The seriesRef are expected to match the profile labels passed in. 173 func (pi *profilesIndex) Add(ps *schemav1.InMemoryProfile, lbs phlaremodel.Labels, profileName string) { 174 pi.mutex.Lock() 175 defer pi.mutex.Unlock() 176 profiles, ok := pi.profilesPerFP[ps.SeriesFingerprint] 177 if !ok { 178 lbs := pi.ix.Add(lbs, ps.SeriesFingerprint) 179 profiles = &profileSeries{ 180 lbs: lbs, 181 fp: ps.SeriesFingerprint, 182 minTime: ps.TimeNanos, 183 maxTime: ps.TimeNanos, 184 profilesOnDisk: make([]*rowRange, pi.rowGroupsOnDisk), 185 } 186 pi.profilesPerFP[ps.SeriesFingerprint] = profiles 187 pi.metrics.series.Set(float64(pi.totalSeries.Inc())) 188 pi.metrics.seriesCreated.WithLabelValues(profileName).Inc() 189 } 190 191 // profile is latest in this series, use a shortcut 192 if ps.TimeNanos > profiles.maxTime { 193 // update max timeNanos 194 profiles.maxTime = ps.TimeNanos 195 196 // add profile to in memory slice 197 profiles.profiles = append(profiles.profiles, ps) 198 } else { 199 // use binary search to find position 200 i := sort.Search(len(profiles.profiles), func(i int) bool { 201 return profiles.profiles[i].TimeNanos > ps.TimeNanos 202 }) 203 204 // insert into slice at correct position 205 profiles.profiles = append(profiles.profiles, &schemav1.InMemoryProfile{}) 206 copy(profiles.profiles[i+1:], profiles.profiles[i:]) 207 profiles.profiles[i] = ps 208 } 209 210 if ps.TimeNanos < profiles.minTime { 211 profiles.minTime = ps.TimeNanos 212 } 213 214 pi.metrics.profiles.Set(float64(pi.totalProfiles.Inc())) 215 pi.metrics.profilesCreated.WithLabelValues(profileName).Inc() 216 } 217 218 func (pi *profilesIndex) selectMatchingFPs(ctx context.Context, params *ingestv1.SelectProfilesRequest) ([]model.Fingerprint, error) { 219 sp, _ := opentracing.StartSpanFromContext(ctx, "selectMatchingFPs - Index") 220 defer sp.Finish() 221 selectors, err := parser.ParseMetricSelector(params.LabelSelector) 222 if err != nil { 223 return nil, status.Error(codes.InvalidArgument, "failed to parse label selectors: "+err.Error()) 224 } 225 if params.Type == nil { 226 return nil, errors.New("no profileType given") 227 } 228 selectors = append(selectors, phlaremodel.SelectorFromProfileType(params.Type)) 229 230 filters, matchers := SplitFiltersAndMatchers(selectors) 231 ids, err := pi.ix.Lookup(matchers, nil) 232 if err != nil { 233 return nil, err 234 } 235 236 pi.mutex.RLock() 237 defer pi.mutex.RUnlock() 238 239 // filter fingerprints that no longer exist or don't match the filters 240 var idx int 241 outer: 242 for _, fp := range ids { 243 profile, ok := pi.profilesPerFP[fp] 244 if !ok { 245 // If a profile labels is missing here, it has already been flushed 246 // and is supposed to be picked up from storage by querier 247 continue 248 } 249 for _, filter := range filters { 250 if !filter.Matches(profile.lbs.Get(filter.Name)) { 251 continue outer 252 } 253 } 254 255 // keep this one 256 ids[idx] = fp 257 idx++ 258 } 259 260 sp.SetTag("matchedSeries", idx) 261 262 return ids[:idx], nil 263 } 264 265 func (pi *profilesIndex) selectMatchingRowRanges(ctx context.Context, params *ingestv1.SelectProfilesRequest, rowGroupIdx int) ( 266 query.Iterator, 267 map[model.Fingerprint]phlaremodel.Labels, 268 error, 269 ) { 270 sp, ctx := opentracing.StartSpanFromContext(ctx, "selectMatchingRowRanges - Index") 271 defer sp.Finish() 272 273 ids, err := pi.selectMatchingFPs(ctx, params) 274 if err != nil { 275 return nil, nil, err 276 } 277 278 // gather rowRanges and labels from matching series under read lock of the index 279 var ( 280 rowRanges = make(rowRanges, len(ids)) 281 labelsPerFP = make(map[model.Fingerprint]phlaremodel.Labels, len(ids)) 282 ) 283 284 pi.mutex.RLock() 285 defer pi.mutex.RUnlock() 286 287 for _, fp := range ids { 288 // skip if series no longer in index 289 profileSeries, ok := pi.profilesPerFP[fp] 290 if !ok { 291 continue 292 } 293 294 labelsPerFP[fp] = profileSeries.lbs 295 296 // skip if rowRange empty 297 rR := profileSeries.profilesOnDisk[rowGroupIdx] 298 if rR == nil { 299 continue 300 } 301 302 rowRanges[*rR] = fp 303 } 304 305 sp.SetTag("rowGroupSegment", rowGroupIdx) 306 sp.SetTag("matchedRowRangesCount", len(rowRanges)) 307 308 return rowRanges.fingerprintsWithRowNum(), labelsPerFP, nil 309 } 310 311 type ProfileWithLabels struct { 312 profile *schemav1.InMemoryProfile 313 lbs phlaremodel.Labels 314 fp model.Fingerprint 315 } 316 317 func (p ProfileWithLabels) RowNumber() int64 { return 0 } 318 319 func (p ProfileWithLabels) StacktracePartition() uint64 { 320 return p.profile.StacktracePartition 321 } 322 323 func (p ProfileWithLabels) Timestamp() model.Time { 324 return model.TimeFromUnixNano(p.profile.TimeNanos) 325 } 326 327 func (p ProfileWithLabels) Fingerprint() model.Fingerprint { 328 return p.fp 329 } 330 331 func (p ProfileWithLabels) Labels() phlaremodel.Labels { 332 return p.lbs 333 } 334 335 func (p ProfileWithLabels) Samples() schemav1.Samples { 336 return p.profile.Samples 337 } 338 339 func (p ProfileWithLabels) Total() int64 { 340 return int64(p.profile.TotalValue) 341 } 342 343 func (p ProfileWithLabels) Annotations() schemav1.Annotations { 344 return p.profile.Annotations 345 } 346 347 type SeriesIterator struct { 348 iter.Iterator[*schemav1.InMemoryProfile] 349 curr ProfileWithLabels 350 fp model.Fingerprint 351 lbs phlaremodel.Labels 352 } 353 354 func NewSeriesIterator(labels phlaremodel.Labels, fingerprint model.Fingerprint, it iter.Iterator[*schemav1.InMemoryProfile]) *SeriesIterator { 355 return &SeriesIterator{ 356 Iterator: it, 357 fp: fingerprint, 358 lbs: labels, 359 } 360 } 361 362 func (it *SeriesIterator) Next() bool { 363 if !it.Iterator.Next() { 364 return false 365 } 366 it.curr = ProfileWithLabels{ 367 profile: it.Iterator.At(), 368 lbs: it.lbs, 369 fp: it.fp, 370 } 371 return true 372 } 373 374 func (it *SeriesIterator) At() Profile { 375 return it.curr 376 } 377 378 // forMatchingLabels iterates through all matching label sets and calls f for each labels set. 379 func (pi *profilesIndex) forMatchingLabels(matchers []*labels.Matcher, 380 fn func(lbs phlaremodel.Labels, fp model.Fingerprint) error, 381 ) error { 382 filters, matchers := SplitFiltersAndMatchers(matchers) 383 ids, err := pi.ix.Lookup(matchers, nil) 384 if err != nil { 385 return err 386 } 387 388 pi.mutex.RLock() 389 defer pi.mutex.RUnlock() 390 391 outer: 392 for _, fp := range ids { 393 profile, ok := pi.profilesPerFP[fp] 394 if !ok { 395 // If a profile labels is missing here, it has already been flushed 396 // and is supposed to be picked up from storage by querier 397 continue 398 } 399 for _, filter := range filters { 400 if !filter.Matches(profile.lbs.Get(filter.Name)) { 401 continue outer 402 } 403 } 404 if err := fn(profile.lbs, fp); err != nil { 405 return err 406 } 407 } 408 return nil 409 } 410 411 // WriteTo writes the profiles tsdb index to the specified filepath. 412 func (pi *profilesIndex) writeTo(ctx context.Context, path string) ([][]rowRangeWithSeriesIndex, error) { 413 writer, err := index.NewWriter(ctx, path) 414 if err != nil { 415 return nil, err 416 } 417 pi.mutex.RLock() 418 defer pi.mutex.RUnlock() 419 420 pfs := make([]*profileSeries, 0, len(pi.profilesPerFP)) 421 422 for _, p := range pi.profilesPerFP { 423 pfs = append(pfs, p) 424 } 425 426 // sort by fp 427 sort.Slice(pfs, func(i, j int) bool { 428 return phlaremodel.CompareLabelPairs(pfs[i].lbs, pfs[j].lbs) < 0 429 }) 430 431 symbolsMap := make(map[string]struct{}) 432 for _, s := range pfs { 433 for _, l := range s.lbs { 434 symbolsMap[l.Name] = struct{}{} 435 symbolsMap[l.Value] = struct{}{} 436 } 437 } 438 439 // Sort symbols 440 symbols := make([]string, 0, len(symbolsMap)) 441 for s := range symbolsMap { 442 symbols = append(symbols, s) 443 } 444 sort.Strings(symbols) 445 446 // Add symbols 447 for _, symbol := range symbols { 448 if err := writer.AddSymbol(symbol); err != nil { 449 return nil, err 450 } 451 } 452 453 // ranges per row group 454 rangesPerRG := make([][]rowRangeWithSeriesIndex, len(pfs[0].profilesOnDisk)) 455 456 // Add series 457 for i, s := range pfs { 458 if err := writer.AddSeries(storage.SeriesRef(i), s.lbs, s.fp, index.ChunkMeta{ 459 MinTime: s.minTime, 460 MaxTime: s.maxTime, 461 // We store the series Index from the head with the series to use when retrieving data from parquet. 462 SeriesIndex: uint32(i), 463 }); err != nil { 464 return nil, err 465 } 466 // store series index 467 for idx, rg := range s.profilesOnDisk { 468 rangesPerRG[idx] = append(rangesPerRG[idx], rowRangeWithSeriesIndex{rowRange: rg, seriesIndex: uint32(i)}) 469 } 470 } 471 472 return rangesPerRG, writer.Close() 473 } 474 475 func (pi *profilesIndex) cutRowGroup(rgProfiles []schemav1.InMemoryProfile) error { 476 pi.mutex.Lock() 477 defer pi.mutex.Unlock() 478 479 // adding rowGroup and rowNum information per fingerprint 480 rowRangePerFP := make(map[model.Fingerprint]*rowRange, len(pi.profilesPerFP)) 481 countPerFP := make(map[model.Fingerprint]int, len(pi.profilesPerFP)) 482 for rowNum, p := range rgProfiles { 483 countPerFP[p.SeriesFingerprint]++ 484 if _, ok := rowRangePerFP[p.SeriesFingerprint]; !ok { 485 rowRangePerFP[p.SeriesFingerprint] = &rowRange{ 486 rowNum: int64(rowNum), 487 } 488 } 489 490 rowRange := rowRangePerFP[p.SeriesFingerprint] 491 rowRange.length++ 492 493 // sanity check 494 if (int(rowRange.rowNum) + rowRange.length - 1) != rowNum { 495 return fmt.Errorf("rowRange is not matching up, ensure that the ordering of the profile row group is ordered correctly, current row_num=%d, expect range %d-%d", rowNum, rowRange.rowNum, int(rowRange.rowNum)+rowRange.length) 496 } 497 } 498 499 pi.rowGroupsOnDisk += 1 500 501 for fp, ps := range pi.profilesPerFP { 502 count := countPerFP[fp] 503 // empty all in memory profiles 504 for i := range ps.profiles[:count] { 505 // Allow GC to evict the object. 506 ps.profiles[i] = nil 507 } 508 ps.profiles = ps.profiles[count:] 509 510 // attach rowGroup and rowNum information 511 rowRange := rowRangePerFP[ps.fp] 512 513 ps.profilesOnDisk = append( 514 ps.profilesOnDisk, 515 rowRange, 516 ) 517 518 } 519 520 return nil 521 } 522 523 // SplitFiltersAndMatchers splits empty matchers off, which are treated as filters, see #220 524 func SplitFiltersAndMatchers(allMatchers []*labels.Matcher) (filters, matchers []*labels.Matcher) { 525 for _, matcher := range allMatchers { 526 // If a matcher matches "", we need to fetch possible chunks where 527 // there is no value and will therefore not be in our label index. 528 // e.g. {foo=""} and {foo!="bar"} both match "", so we need to return 529 // chunks which do not have a foo label set. When looking entries in 530 // the index, we should ignore this matcher to fetch all possible chunks 531 // and then filter on the matcher after the chunks have been fetched. 532 if matcher.Matches("") { 533 filters = append(filters, matcher) 534 } else { 535 matchers = append(matchers, matcher) 536 } 537 } 538 return 539 }