github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/compact_test.go (about) 1 package phlaredb 2 3 import ( 4 "context" 5 "fmt" 6 _ "net/http/pprof" 7 "os" 8 "path/filepath" 9 "sort" 10 "testing" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/oklog/ulid/v2" 15 "github.com/parquet-go/parquet-go" 16 "github.com/prometheus/common/model" 17 "github.com/prometheus/prometheus/storage" 18 "github.com/samber/lo" 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 22 ingesterv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1" 23 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 24 phlaremodel "github.com/grafana/pyroscope/pkg/model" 25 "github.com/grafana/pyroscope/pkg/objstore/client" 26 "github.com/grafana/pyroscope/pkg/objstore/providers/filesystem" 27 "github.com/grafana/pyroscope/pkg/phlaredb/block" 28 "github.com/grafana/pyroscope/pkg/phlaredb/sharding" 29 "github.com/grafana/pyroscope/pkg/phlaredb/symdb" 30 "github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index" 31 "github.com/grafana/pyroscope/pkg/pprof/testhelper" 32 phlarecontext "github.com/grafana/pyroscope/pkg/pyroscope/context" 33 ) 34 35 func TestCompact(t *testing.T) { 36 ctx := context.Background() 37 b := newBlock(t, func() []*testhelper.ProfileBuilder { 38 return []*testhelper.ProfileBuilder{ 39 testhelper.NewProfileBuilder(int64(time.Second*1)). 40 CPUProfile(). 41 WithLabels( 42 "job", "a", 43 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 44 testhelper.NewProfileBuilder(int64(time.Second*2)). 45 CPUProfile(). 46 WithLabels( 47 "job", "b", 48 ). 49 WithAnnotations("test annotation"). 50 ForStacktraceString("foo", "bar", "baz").AddSamples(1), 51 testhelper.NewProfileBuilder(int64(time.Second*3)). 52 CPUProfile(). 53 WithLabels( 54 "job", "c", 55 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 56 } 57 }) 58 dst := t.TempDir() 59 compacted, err := Compact(ctx, []BlockReader{b, b, b, b}, dst) 60 require.NoError(t, err) 61 require.Equal(t, uint64(3), compacted.Stats.NumProfiles) 62 require.Equal(t, uint64(3), compacted.Stats.NumSamples) 63 require.Equal(t, uint64(3), compacted.Stats.NumSeries) 64 require.Equal(t, model.TimeFromUnix(1), compacted.MinTime) 65 require.Equal(t, model.TimeFromUnix(3), compacted.MaxTime) 66 querier := blockQuerierFromMeta(t, dst, compacted) 67 68 matchAll := &ingesterv1.SelectProfilesRequest{ 69 LabelSelector: "{}", 70 Type: mustParseProfileSelector(t, "process_cpu:cpu:nanoseconds:cpu:nanoseconds"), 71 Start: 0, 72 End: 40000, 73 } 74 it, err := querier.SelectMatchingProfiles(ctx, matchAll) 75 require.NoError(t, err) 76 series, err := querier.MergeByLabels(ctx, it, nil, "job") 77 require.NoError(t, err) 78 require.Equal(t, []*typesv1.Series{ 79 { 80 Labels: phlaremodel.LabelsFromStrings("job", "a"), 81 Points: []*typesv1.Point{{Value: float64(1), Timestamp: int64(1000), Annotations: []*typesv1.ProfileAnnotation{}}}, 82 }, 83 { 84 Labels: phlaremodel.LabelsFromStrings("job", "b"), 85 Points: []*typesv1.Point{ 86 {Value: float64(1), Timestamp: int64(2000), Annotations: []*typesv1.ProfileAnnotation{ 87 {Key: "throttled", Value: "test annotation"}, 88 }}, 89 }, 90 }, 91 { 92 Labels: phlaremodel.LabelsFromStrings("job", "c"), 93 Points: []*typesv1.Point{{Value: float64(1), Timestamp: int64(3000), Annotations: []*typesv1.ProfileAnnotation{}}}, 94 }, 95 }, series) 96 97 it, err = querier.SelectMatchingProfiles(ctx, matchAll) 98 require.NoError(t, err) 99 res, err := querier.MergeByStacktraces(ctx, it, 0) 100 require.NoError(t, err) 101 require.NotNil(t, res) 102 103 expected := new(phlaremodel.Tree) 104 expected.InsertStack(3, "baz", "bar", "foo") 105 require.Equal(t, expected.String(), res.String()) 106 } 107 108 func TestCompactWithDownsampling(t *testing.T) { 109 ctx := context.Background() 110 b := newBlock(t, func() []*testhelper.ProfileBuilder { 111 return []*testhelper.ProfileBuilder{ 112 testhelper.NewProfileBuilder(int64(time.Hour-time.Minute)). 113 CPUProfile(). 114 WithLabels( 115 "job", "a", 116 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 117 testhelper.NewProfileBuilder(int64(time.Hour+time.Minute)). 118 CPUProfile(). 119 WithLabels( 120 "job", "b", 121 ).WithAnnotations("test annotation"). 122 ForStacktraceString("foo", "bar", "baz").AddSamples(1), 123 testhelper.NewProfileBuilder(int64(time.Hour+6*time.Minute)). 124 CPUProfile(). 125 WithLabels( 126 "job", "c", 127 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 128 } 129 }) 130 dst := t.TempDir() 131 b.meta.Compaction.Level = 2 132 compacted, err := Compact(ctx, []BlockReader{b, b, b, b}, dst) 133 require.NoError(t, err) 134 require.Equal(t, uint64(3), compacted.Stats.NumProfiles) 135 require.Equal(t, uint64(3), compacted.Stats.NumSamples) 136 require.Equal(t, uint64(3), compacted.Stats.NumSeries) 137 require.Equal(t, model.Time((time.Hour - time.Minute).Milliseconds()), compacted.MinTime) 138 require.Equal(t, model.Time((time.Hour + 6*time.Minute).Milliseconds()), compacted.MaxTime) 139 140 for _, f := range []*block.File{ 141 compacted.FileByRelPath("profiles_5m_sum.parquet"), 142 compacted.FileByRelPath("profiles_1h_sum.parquet"), 143 } { 144 require.NotNil(t, f) 145 assert.NotZero(t, f.SizeBytes) 146 } 147 148 querier := blockQuerierFromMeta(t, dst, compacted) 149 matchAll := &ingesterv1.SelectProfilesRequest{ 150 LabelSelector: "{}", 151 Type: mustParseProfileSelector(t, "process_cpu:cpu:nanoseconds:cpu:nanoseconds"), 152 Start: 0, 153 End: (time.Hour + 7*time.Minute - time.Millisecond).Milliseconds(), 154 } 155 it, err := querier.SelectMatchingProfiles(ctx, matchAll) 156 require.NoError(t, err) 157 series, err := querier.MergeByLabels(ctx, it, nil, "job") 158 require.NoError(t, err) 159 require.Equal(t, []*typesv1.Series{ 160 { 161 Labels: phlaremodel.LabelsFromStrings("job", "a"), 162 Points: []*typesv1.Point{{Value: float64(1), Timestamp: (time.Hour - time.Minute).Milliseconds(), Annotations: []*typesv1.ProfileAnnotation{}}}, 163 }, 164 { 165 Labels: phlaremodel.LabelsFromStrings("job", "b"), 166 Points: []*typesv1.Point{{Value: float64(1), Timestamp: (time.Hour + time.Minute).Milliseconds(), Annotations: []*typesv1.ProfileAnnotation{{Key: "throttled", Value: "test annotation"}}}}, 167 }, 168 { 169 Labels: phlaremodel.LabelsFromStrings("job", "c"), 170 Points: []*typesv1.Point{{Value: float64(1), Timestamp: (time.Hour + 6*time.Minute).Milliseconds(), Annotations: []*typesv1.ProfileAnnotation{}}}, 171 }, 172 }, series) 173 174 it, err = querier.SelectMatchingProfiles(ctx, matchAll) 175 require.NoError(t, err) 176 res, err := querier.MergeByStacktraces(ctx, it, 0) 177 require.NoError(t, err) 178 require.NotNil(t, res) 179 180 expected := new(phlaremodel.Tree) 181 expected.InsertStack(3, "baz", "bar", "foo") 182 require.Equal(t, expected.String(), res.String()) 183 184 res, err = querier.SelectMergeByStacktraces(ctx, matchAll, 0) 185 require.NoError(t, err) 186 require.NotNil(t, res) 187 require.Equal(t, expected.String(), res.String()) 188 assert.False(t, querier.metrics.profileTableAccess.DeleteLabelValues("")) 189 assert.True(t, querier.metrics.profileTableAccess.DeleteLabelValues("profiles_5m_sum.parquet")) 190 assert.True(t, querier.metrics.profileTableAccess.DeleteLabelValues("profiles_1h_sum.parquet")) 191 assert.True(t, querier.metrics.profileTableAccess.DeleteLabelValues("profiles.parquet")) 192 } 193 194 func TestCompactWithSplitting(t *testing.T) { 195 ctx := context.Background() 196 197 b1 := newBlock(t, func() []*testhelper.ProfileBuilder { 198 return append( 199 profileSeriesGenerator(t, time.Unix(1, 0), time.Unix(10, 0), time.Second, "job", "a"), 200 profileSeriesGenerator(t, time.Unix(11, 0), time.Unix(20, 0), time.Second, "job", "b")..., 201 ) 202 }) 203 b2 := newBlock(t, func() []*testhelper.ProfileBuilder { 204 return append( 205 append( 206 append( 207 profileSeriesGenerator(t, time.Unix(1, 0), time.Unix(10, 0), time.Second, "job", "c"), 208 profileSeriesGenerator(t, time.Unix(11, 0), time.Unix(20, 0), time.Second, "job", "d")..., 209 ), profileSeriesGenerator(t, time.Unix(1, 0), time.Unix(10, 0), time.Second, "job", "a")..., 210 ), 211 profileSeriesGenerator(t, time.Unix(11, 0), time.Unix(20, 0), time.Second, "job", "b")..., 212 ) 213 }) 214 dst := t.TempDir() 215 compacted, err := CompactWithSplitting(ctx, CompactWithSplittingOpts{ 216 Src: []BlockReader{b1, b2, b2, b1}, 217 Dst: dst, 218 SplitCount: 16, 219 StageSize: 8, 220 SplitBy: SplitByFingerprint, 221 DownsamplerEnabled: true, 222 Logger: log.NewNopLogger(), 223 }) 224 require.NoError(t, err) 225 226 require.NoDirExists(t, filepath.Join(dst, symdb.DefaultDirName)) 227 228 // 4 shards one per series. 229 require.Equal(t, 4, len(compacted)) 230 require.Equal(t, "1_of_16", compacted[0].Labels[sharding.CompactorShardIDLabel]) 231 require.Equal(t, "6_of_16", compacted[1].Labels[sharding.CompactorShardIDLabel]) 232 require.Equal(t, "7_of_16", compacted[2].Labels[sharding.CompactorShardIDLabel]) 233 require.Equal(t, "14_of_16", compacted[3].Labels[sharding.CompactorShardIDLabel]) 234 235 require.Equal(t, model.TimeFromUnix(1), compacted[1].MinTime) 236 require.Equal(t, model.TimeFromUnix(20), compacted[1].MaxTime) 237 238 // We first verify we have all series and timestamps across querying all blocks. 239 queriers := make(Queriers, len(compacted)) 240 for i, blk := range compacted { 241 queriers[i] = blockQuerierFromMeta(t, dst, blk) 242 } 243 244 err = queriers.Open(context.Background()) 245 require.NoError(t, err) 246 matchAll := &ingesterv1.SelectProfilesRequest{ 247 LabelSelector: "{}", 248 Type: mustParseProfileSelector(t, "process_cpu:cpu:nanoseconds:cpu:nanoseconds"), 249 Start: 0, 250 End: 40000, 251 } 252 it, err := queriers.SelectMatchingProfiles(context.Background(), matchAll) 253 require.NoError(t, err) 254 255 seriesMap := make(map[model.Fingerprint]lo.Tuple2[phlaremodel.Labels, []model.Time]) 256 for it.Next() { 257 r := it.At() 258 seriesMap[r.Fingerprint()] = lo.T2(r.Labels().WithoutPrivateLabels(), append(seriesMap[r.Fingerprint()].B, r.Timestamp())) 259 } 260 require.NoError(t, it.Err()) 261 require.NoError(t, it.Close()) 262 series := lo.Values(seriesMap) 263 sort.Slice(series, func(i, j int) bool { 264 return phlaremodel.CompareLabelPairs(series[i].A, series[j].A) < 0 265 }) 266 require.Equal(t, []lo.Tuple2[phlaremodel.Labels, []model.Time]{ 267 lo.T2(phlaremodel.LabelsFromStrings("job", "a"), 268 generateTimes(t, model.TimeFromUnix(1), model.TimeFromUnix(10)), 269 ), 270 lo.T2(phlaremodel.LabelsFromStrings("job", "b"), 271 generateTimes(t, model.TimeFromUnix(11), model.TimeFromUnix(20)), 272 ), 273 lo.T2(phlaremodel.LabelsFromStrings("job", "c"), 274 generateTimes(t, model.TimeFromUnix(1), model.TimeFromUnix(10)), 275 ), 276 lo.T2(phlaremodel.LabelsFromStrings("job", "d"), 277 generateTimes(t, model.TimeFromUnix(11), model.TimeFromUnix(20)), 278 ), 279 }, series) 280 281 // Then we query 2 different shards and verify we have a subset of series. 282 it, err = queriers[0].SelectMatchingProfiles(ctx, matchAll) 283 require.NoError(t, err) 284 seriesResult, err := queriers[0].MergeByLabels(context.Background(), it, nil, "job") 285 require.NoError(t, err) 286 require.Equal(t, 287 []*typesv1.Series{ 288 { 289 Labels: phlaremodel.LabelsFromStrings("job", "a"), 290 Points: generatePoints(t, model.TimeFromUnix(1), model.TimeFromUnix(10)), 291 }, 292 }, seriesResult) 293 294 it, err = queriers[1].SelectMatchingProfiles(ctx, matchAll) 295 require.NoError(t, err) 296 seriesResult, err = queriers[1].MergeByLabels(context.Background(), it, nil, "job") 297 require.NoError(t, err) 298 require.Equal(t, 299 []*typesv1.Series{ 300 { 301 Labels: phlaremodel.LabelsFromStrings("job", "b"), 302 Points: generatePoints(t, model.TimeFromUnix(11), model.TimeFromUnix(20)), 303 }, 304 }, seriesResult) 305 306 // Finally test some stacktraces resolution. 307 it, err = queriers[1].SelectMatchingProfiles(ctx, matchAll) 308 require.NoError(t, err) 309 res, err := queriers[1].MergeByStacktraces(ctx, it, 0) 310 require.NoError(t, err) 311 312 expected := new(phlaremodel.Tree) 313 expected.InsertStack(10, "baz", "bar", "foo") 314 require.Equal(t, expected.String(), res.String()) 315 } 316 317 // nolint:unparam 318 func profileSeriesGenerator(t *testing.T, from, through time.Time, interval time.Duration, lbls ...string) []*testhelper.ProfileBuilder { 319 t.Helper() 320 var builders []*testhelper.ProfileBuilder 321 for ts := from; ts.Before(through) || ts.Equal(through); ts = ts.Add(interval) { 322 builders = append(builders, 323 testhelper.NewProfileBuilder(ts.UnixNano()). 324 CPUProfile(). 325 WithLabels( 326 lbls..., 327 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1)) 328 } 329 return builders 330 } 331 332 func generatePoints(t *testing.T, from, through model.Time) []*typesv1.Point { 333 t.Helper() 334 var points []*typesv1.Point 335 for ts := from; ts.Before(through) || ts.Equal(through); ts = ts.Add(time.Second) { 336 points = append(points, &typesv1.Point{Timestamp: int64(ts), Value: 1, Annotations: []*typesv1.ProfileAnnotation{}}) 337 } 338 return points 339 } 340 341 func generateTimes(t *testing.T, from, through model.Time) []model.Time { 342 t.Helper() 343 var times []model.Time 344 for ts := from; ts.Before(through) || ts.Equal(through); ts = ts.Add(time.Second) { 345 times = append(times, ts) 346 } 347 return times 348 } 349 350 func TestProfileRowIterator(t *testing.T) { 351 b := newBlock(t, func() []*testhelper.ProfileBuilder { 352 return []*testhelper.ProfileBuilder{ 353 testhelper.NewProfileBuilder(int64(1)). 354 CPUProfile(). 355 WithLabels( 356 "job", "a", 357 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 358 testhelper.NewProfileBuilder(int64(2)). 359 CPUProfile(). 360 WithLabels( 361 "job", "b", 362 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 363 testhelper.NewProfileBuilder(int64(3)). 364 CPUProfile(). 365 WithLabels( 366 "job", "c", 367 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 368 } 369 }) 370 371 it, err := newProfileRowIterator(b) 372 require.NoError(t, err) 373 374 assert.True(t, it.Next()) 375 require.Equal(t, it.At().labels.WithoutPrivateLabels(), phlaremodel.Labels{ 376 &typesv1.LabelPair{Name: "job", Value: "a"}, 377 }) 378 require.Equal(t, it.At().timeNanos, int64(1)) 379 380 assert.True(t, it.Next()) 381 require.Equal(t, it.At().labels.WithoutPrivateLabels(), phlaremodel.Labels{ 382 &typesv1.LabelPair{Name: "job", Value: "b"}, 383 }) 384 require.Equal(t, it.At().timeNanos, int64(2)) 385 386 assert.True(t, it.Next()) 387 require.Equal(t, it.At().labels.WithoutPrivateLabels(), phlaremodel.Labels{ 388 &typesv1.LabelPair{Name: "job", Value: "c"}, 389 }) 390 require.Equal(t, it.At().timeNanos, int64(3)) 391 392 assert.False(t, it.Next()) 393 require.NoError(t, it.Err()) 394 require.NoError(t, it.Close()) 395 } 396 397 func TestMergeRowProfileIterator(t *testing.T) { 398 type profile struct { 399 timeNanos int64 400 labels phlaremodel.Labels 401 } 402 403 a, b, c := phlaremodel.Labels{ 404 &typesv1.LabelPair{Name: "job", Value: "a"}, 405 }, phlaremodel.Labels{ 406 &typesv1.LabelPair{Name: "job", Value: "b"}, 407 }, phlaremodel.Labels{ 408 &typesv1.LabelPair{Name: "job", Value: "c"}, 409 } 410 411 for _, tc := range []struct { 412 name string 413 in [][]profile 414 expected []profile 415 }{ 416 { 417 name: "only duplicates", 418 in: [][]profile{ 419 { 420 {timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, 421 }, 422 { 423 {timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, 424 }, 425 { 426 {timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, 427 }, 428 { 429 {timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, 430 }, 431 }, 432 expected: []profile{ 433 {timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, 434 }, 435 }, 436 { 437 name: "missing some", 438 in: [][]profile{ 439 { 440 {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, {timeNanos: 4, labels: c}, 441 }, 442 { 443 {timeNanos: 1, labels: a}, 444 }, 445 { 446 {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, 447 }, 448 }, 449 expected: []profile{ 450 {timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, {timeNanos: 4, labels: c}, 451 }, 452 }, 453 { 454 name: "no duplicates", 455 in: [][]profile{ 456 { 457 {timeNanos: 2, labels: b}, 458 }, 459 { 460 {timeNanos: 1, labels: a}, 461 }, 462 { 463 {timeNanos: 3, labels: c}, 464 }, 465 }, 466 expected: []profile{ 467 {timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, 468 }, 469 }, 470 } { 471 t.Run(tc.name, func(t *testing.T) { 472 blocks := make([]BlockReader, len(tc.in)) 473 for i, profiles := range tc.in { 474 blocks[i] = newBlock(t, func() []*testhelper.ProfileBuilder { 475 var builders []*testhelper.ProfileBuilder 476 for _, p := range profiles { 477 prof := testhelper.NewProfileBuilder(p.timeNanos). 478 CPUProfile().ForStacktraceString("foo").AddSamples(1) 479 for _, l := range p.labels { 480 prof.WithLabels(l.Name, l.Value) 481 } 482 builders = append(builders, prof) 483 } 484 return builders 485 }) 486 } 487 it, err := newMergeRowProfileIterator(blocks) 488 require.NoError(t, err) 489 actual := []profile{} 490 for it.Next() { 491 actual = append(actual, profile{ 492 timeNanos: it.At().timeNanos, 493 labels: it.At().labels.WithoutPrivateLabels(), 494 }) 495 require.Equal(t, model.Fingerprint(it.At().labels.Hash()), it.At().fp) 496 } 497 require.NoError(t, it.Err()) 498 require.NoError(t, it.Close()) 499 require.Equal(t, tc.expected, actual) 500 }) 501 } 502 } 503 504 func TestSeriesRewriter(t *testing.T) { 505 type profile struct { 506 timeNanos int64 507 labels phlaremodel.Labels 508 } 509 510 in := []profile{ 511 {1, phlaremodel.LabelsFromStrings("job", "a")}, 512 {2, phlaremodel.LabelsFromStrings("job", "a")}, 513 {3, phlaremodel.LabelsFromStrings("job", "a")}, 514 {2, phlaremodel.LabelsFromStrings("job", "b")}, 515 {1, phlaremodel.LabelsFromStrings("job", "c")}, 516 {2, phlaremodel.LabelsFromStrings("job", "c")}, 517 } 518 519 blk := newBlock(t, func() []*testhelper.ProfileBuilder { 520 var builders []*testhelper.ProfileBuilder 521 for _, p := range in { 522 prof := testhelper.NewProfileBuilder(p.timeNanos). 523 CPUProfile().ForStacktraceString("foo").AddSamples(1) 524 for _, l := range p.labels { 525 prof.WithLabels(l.Name, l.Value) 526 } 527 builders = append(builders, prof) 528 } 529 return builders 530 }) 531 rows, err := newProfileRowIterator(blk) 532 require.NoError(t, err) 533 path := t.TempDir() 534 filePath := filepath.Join(path, block.IndexFilename) 535 idxw := newIndexRewriter(path) 536 seriesIdx := []uint32{} 537 for rows.Next() { 538 r := rows.At() 539 require.NoError(t, idxw.ReWriteRow(r)) 540 seriesIdx = append(seriesIdx, r.row.SeriesIndex()) 541 } 542 require.NoError(t, rows.Err()) 543 require.NoError(t, rows.Close()) 544 545 require.Equal(t, []uint32{0, 0, 0, 1, 2, 2}, seriesIdx) 546 547 err = idxw.Close(context.Background()) 548 require.NoError(t, err) 549 550 idxr, err := index.NewFileReader(filePath) 551 require.NoError(t, err) 552 defer idxr.Close() 553 554 k, v := index.AllPostingsKey() 555 p, err := idxr.Postings(k, nil, v) 556 require.NoError(t, err) 557 558 chunks := make([]index.ChunkMeta, 1) 559 var lbs phlaremodel.Labels 560 561 require.True(t, p.Next()) 562 fp, err := idxr.Series(p.At(), &lbs, &chunks) 563 require.NoError(t, err) 564 require.Equal(t, model.Fingerprint(lbs.Hash()), model.Fingerprint(fp)) 565 require.Equal(t, lbs.WithoutPrivateLabels(), phlaremodel.LabelsFromStrings("job", "a")) 566 require.Equal(t, []index.ChunkMeta{{ 567 SeriesIndex: 0, 568 MinTime: int64(1), 569 MaxTime: int64(3), 570 }}, chunks) 571 572 require.True(t, p.Next()) 573 fp, err = idxr.Series(p.At(), &lbs, &chunks) 574 require.NoError(t, err) 575 require.Equal(t, model.Fingerprint(lbs.Hash()), model.Fingerprint(fp)) 576 require.Equal(t, lbs.WithoutPrivateLabels(), phlaremodel.LabelsFromStrings("job", "b")) 577 require.Equal(t, []index.ChunkMeta{{ 578 SeriesIndex: 1, 579 MinTime: int64(2), 580 MaxTime: int64(2), 581 }}, chunks) 582 583 require.True(t, p.Next()) 584 fp, err = idxr.Series(p.At(), &lbs, &chunks) 585 require.NoError(t, err) 586 require.Equal(t, model.Fingerprint(lbs.Hash()), model.Fingerprint(fp)) 587 require.Equal(t, lbs.WithoutPrivateLabels(), phlaremodel.LabelsFromStrings("job", "c")) 588 require.Equal(t, []index.ChunkMeta{{ 589 SeriesIndex: 2, 590 MinTime: int64(1), 591 MaxTime: int64(2), 592 }}, chunks) 593 } 594 595 func TestCompactOldBlock(t *testing.T) { 596 meta, err := block.ReadMetaFromDir("./testdata/01HD3X85G9BGAG4S3TKPNMFG4Z") 597 require.NoError(t, err) 598 dst := t.TempDir() 599 ctx := context.Background() 600 t.Log(meta) 601 bkt, err := client.NewBucket(ctx, client.Config{ 602 StorageBackendConfig: client.StorageBackendConfig{ 603 Backend: client.Filesystem, 604 Filesystem: filesystem.Config{ 605 Directory: "./testdata/", 606 }, 607 }, 608 }, "test") 609 require.NoError(t, err) 610 br := NewSingleBlockQuerierFromMeta(context.Background(), bkt, meta) 611 require.NoError(t, br.Open(ctx)) 612 _, err = CompactWithSplitting(ctx, CompactWithSplittingOpts{ 613 Src: []BlockReader{br}, 614 Dst: dst, 615 SplitCount: 2, 616 StageSize: 0, 617 SplitBy: SplitByFingerprint, 618 DownsamplerEnabled: true, 619 }) 620 require.NoError(t, err) 621 } 622 623 func TestFlushMeta(t *testing.T) { 624 b := newBlock(t, func() []*testhelper.ProfileBuilder { 625 return []*testhelper.ProfileBuilder{ 626 testhelper.NewProfileBuilder(int64(time.Second*1)). 627 CPUProfile(). 628 WithLabels( 629 "job", "a", 630 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 631 testhelper.NewProfileBuilder(int64(time.Second*2)). 632 CPUProfile(). 633 WithLabels( 634 "job", "b", 635 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 636 testhelper.NewProfileBuilder(int64(time.Second*3)). 637 CPUProfile(). 638 WithLabels( 639 "job", "c", 640 ).ForStacktraceString("foo", "bar", "baz").AddSamples(1), 641 } 642 }) 643 644 require.Equal(t, []ulid.ULID{b.Meta().ULID}, b.Meta().Compaction.Sources) 645 require.Equal(t, 1, b.Meta().Compaction.Level) 646 require.Equal(t, false, b.Meta().Compaction.Deletable) 647 require.Equal(t, false, b.Meta().Compaction.Failed) 648 require.Equal(t, []string(nil), b.Meta().Compaction.Hints) 649 require.Equal(t, []block.BlockDesc(nil), b.Meta().Compaction.Parents) 650 require.Equal(t, block.MetaVersion3, b.Meta().Version) 651 require.Equal(t, model.Time(1000), b.Meta().MinTime) 652 require.Equal(t, model.Time(3000), b.Meta().MaxTime) 653 require.Equal(t, uint64(3), b.Meta().Stats.NumSeries) 654 require.Equal(t, uint64(3), b.Meta().Stats.NumSamples) 655 require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles) 656 require.Len(t, b.Meta().Files, 8) 657 require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath) 658 require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath) 659 require.Equal(t, "symbols/functions.parquet", b.Meta().Files[2].RelPath) 660 require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath) 661 require.Equal(t, "symbols/locations.parquet", b.Meta().Files[4].RelPath) 662 require.Equal(t, "symbols/mappings.parquet", b.Meta().Files[5].RelPath) 663 require.Equal(t, "symbols/stacktraces.symdb", b.Meta().Files[6].RelPath) 664 require.Equal(t, "symbols/strings.parquet", b.Meta().Files[7].RelPath) 665 } 666 667 func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier { 668 t.Helper() 669 dir := t.TempDir() 670 ctx := phlarecontext.WithLogger(context.Background(), log.NewNopLogger()) 671 h, err := NewHead(ctx, Config{ 672 DataPath: dir, 673 MaxBlockDuration: 24 * time.Hour, 674 Parquet: &ParquetConfig{ 675 MaxBufferRowCount: 10, 676 }, 677 }, NoLimit) 678 require.NoError(t, err) 679 680 // ingest. 681 for _, p := range generator() { 682 require.NoError(t, h.Ingest(ctx, p.Profile, p.UUID, p.Annotations, p.Labels...)) 683 } 684 685 require.NoError(t, h.Flush(ctx)) 686 require.NoError(t, h.Move()) 687 688 bkt, err := client.NewBucket(ctx, client.Config{ 689 StorageBackendConfig: client.StorageBackendConfig{ 690 Backend: client.Filesystem, 691 Filesystem: filesystem.Config{ 692 Directory: dir, 693 }, 694 }, 695 Prefix: "local", 696 }, "test") 697 require.NoError(t, err) 698 metaMap, err := block.ListBlocks(filepath.Join(dir, PathLocal), time.Time{}) 699 require.NoError(t, err) 700 require.Len(t, metaMap, 1) 701 var meta *block.Meta 702 for _, m := range metaMap { 703 meta = m 704 } 705 blk := NewSingleBlockQuerierFromMeta(ctx, bkt, meta) 706 require.NoError(t, blk.Open(ctx)) 707 return blk 708 } 709 710 func blockQuerierFromMeta(t *testing.T, dir string, m block.Meta) *singleBlockQuerier { 711 t.Helper() 712 ctx := context.Background() 713 bkt, err := client.NewBucket(ctx, client.Config{ 714 StorageBackendConfig: client.StorageBackendConfig{ 715 Backend: client.Filesystem, 716 Filesystem: filesystem.Config{ 717 Directory: dir, 718 }, 719 }, 720 Prefix: "", 721 }, "test") 722 require.NoError(t, err) 723 blk := NewSingleBlockQuerierFromMeta(ctx, bkt, &m) 724 require.NoError(t, blk.Open(ctx)) 725 return blk 726 } 727 728 func TestCompactMetas(t *testing.T) { 729 actual := compactMetas([]block.Meta{ 730 { 731 ULID: ulid.MustParse("00000000000000000000000001"), 732 MinTime: model.TimeFromUnix(0), 733 MaxTime: model.TimeFromUnix(100), 734 Compaction: block.BlockMetaCompaction{ 735 Level: 1, 736 Sources: []ulid.ULID{ulid.MustParse("00000000000000000000000001")}, 737 }, 738 Labels: map[string]string{"foo": "bar"}, 739 }, 740 { 741 ULID: ulid.MustParse("00000000000000000000000002"), 742 MinTime: model.TimeFromUnix(50), 743 MaxTime: model.TimeFromUnix(100), 744 Compaction: block.BlockMetaCompaction{ 745 Level: 0, 746 Sources: []ulid.ULID{ulid.MustParse("00000000000000000000000002")}, 747 }, 748 Labels: map[string]string{"bar": "buzz"}, 749 }, 750 { 751 ULID: ulid.MustParse("00000000000000000000000003"), 752 MinTime: model.TimeFromUnix(50), 753 MaxTime: model.TimeFromUnix(200), 754 Compaction: block.BlockMetaCompaction{ 755 Level: 3, 756 Sources: []ulid.ULID{ulid.MustParse("00000000000000000000000003")}, 757 }, 758 }, 759 }...) 760 labels := map[string]string{"foo": "bar", "bar": "buzz"} 761 require.Equal(t, model.TimeFromUnix(0), actual.MinTime) 762 require.Equal(t, model.TimeFromUnix(200), actual.MaxTime) 763 require.Equal(t, block.BlockMetaCompaction{ 764 Level: 4, 765 Sources: []ulid.ULID{ 766 ulid.MustParse("00000000000000000000000001"), 767 ulid.MustParse("00000000000000000000000002"), 768 ulid.MustParse("00000000000000000000000003"), 769 }, 770 Parents: []block.BlockDesc{ 771 { 772 ULID: ulid.MustParse("00000000000000000000000001"), 773 MinTime: 0, 774 MaxTime: 100000, 775 }, 776 { 777 ULID: ulid.MustParse("00000000000000000000000002"), 778 MinTime: 50000, 779 MaxTime: 100000, 780 }, 781 { 782 ULID: ulid.MustParse("00000000000000000000000003"), 783 MinTime: 50000, 784 MaxTime: 200000, 785 }, 786 }, 787 }, actual.Compaction) 788 require.Equal(t, labels, actual.Labels) 789 require.Equal(t, block.CompactorSource, actual.Source) 790 } 791 792 func TestMetaFilesFromDir(t *testing.T) { 793 dst := t.TempDir() 794 generateParquetFile(t, filepath.Join(dst, "foo.parquet")) 795 generateParquetFile(t, filepath.Join(dst, "symbols", "bar.parquet")) 796 generateFile(t, filepath.Join(dst, "symbols", "index.symdb"), 100) 797 generateFile(t, filepath.Join(dst, "symbols", "stacktraces.symdb"), 200) 798 generateIndexFile(t, dst) 799 actual, err := metaFilesFromDir(dst) 800 801 require.NoError(t, err) 802 require.Equal(t, 5, len(actual)) 803 require.Equal(t, []block.File{ 804 { 805 Parquet: &block.ParquetFile{ 806 NumRows: 100, 807 NumRowGroups: 10, 808 }, 809 RelPath: "foo.parquet", 810 SizeBytes: fileSize(t, filepath.Join(dst, "foo.parquet")), 811 }, 812 { 813 RelPath: block.IndexFilename, 814 SizeBytes: fileSize(t, filepath.Join(dst, block.IndexFilename)), 815 TSDB: &block.TSDBFile{ 816 NumSeries: 3, 817 }, 818 }, 819 { 820 Parquet: &block.ParquetFile{ 821 NumRows: 100, 822 NumRowGroups: 10, 823 }, 824 RelPath: filepath.Join("symbols", "bar.parquet"), 825 SizeBytes: fileSize(t, filepath.Join(dst, "symbols", "bar.parquet")), 826 }, 827 { 828 RelPath: filepath.Join("symbols", "index.symdb"), 829 SizeBytes: fileSize(t, filepath.Join(dst, "symbols", "index.symdb")), 830 }, 831 { 832 RelPath: filepath.Join("symbols", "stacktraces.symdb"), 833 SizeBytes: fileSize(t, filepath.Join(dst, "symbols", "stacktraces.symdb")), 834 }, 835 }, actual) 836 } 837 838 func fileSize(t *testing.T, path string) uint64 { 839 t.Helper() 840 fi, err := os.Stat(path) 841 require.NoError(t, err) 842 return uint64(fi.Size()) 843 } 844 845 func generateFile(t *testing.T, path string, size int) { 846 t.Helper() 847 require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) 848 f, err := os.Create(path) 849 require.NoError(t, err) 850 defer f.Close() 851 require.NoError(t, f.Truncate(int64(size))) 852 } 853 854 func generateIndexFile(t *testing.T, dir string) { 855 t.Helper() 856 filePath := filepath.Join(dir, block.IndexFilename) 857 idxw, err := index.NewWriter(context.Background(), filePath) 858 require.NoError(t, err) 859 require.NoError(t, idxw.AddSymbol("a")) 860 require.NoError(t, idxw.AddSymbol("b")) 861 require.NoError(t, idxw.AddSymbol("c")) 862 addSeries(t, idxw, 0, phlaremodel.Labels{ 863 &typesv1.LabelPair{Name: "a", Value: "b"}, 864 }) 865 addSeries(t, idxw, 1, phlaremodel.Labels{ 866 &typesv1.LabelPair{Name: "a", Value: "c"}, 867 }) 868 addSeries(t, idxw, 2, phlaremodel.Labels{ 869 &typesv1.LabelPair{Name: "b", Value: "a"}, 870 }) 871 require.NoError(t, idxw.Close()) 872 } 873 874 func addSeries(t *testing.T, idxw *index.Writer, idx int, labels phlaremodel.Labels) { 875 t.Helper() 876 require.NoError(t, idxw.AddSeries(storage.SeriesRef(idx), labels, model.Fingerprint(labels.Hash()), index.ChunkMeta{SeriesIndex: uint32(idx)})) 877 } 878 879 func generateParquetFile(t *testing.T, path string) { 880 t.Helper() 881 require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) 882 file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644) 883 require.NoError(t, err) 884 defer file.Close() 885 886 writer := parquet.NewGenericWriter[struct{ Name string }](file, parquet.MaxRowsPerRowGroup(10)) 887 defer writer.Close() 888 for i := 0; i < 100; i++ { 889 _, err := writer.Write([]struct{ Name string }{ 890 {Name: fmt.Sprintf("name-%d", i)}, 891 }) 892 require.NoError(t, err) 893 } 894 } 895 896 func Test_SplitStages(t *testing.T) { 897 tests := []struct { 898 n, s int 899 result [][]int 900 }{ 901 {12, 3, [][]int{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}}, 902 {7, 3, [][]int{{0, 1, 2}, {3, 4, 5}, {6}}}, 903 {10, 2, [][]int{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}}}, 904 {5, 5, [][]int{{0, 1, 2, 3, 4}}}, 905 } 906 907 for _, test := range tests { 908 assert.Equal(t, test.result, splitStages(test.n, test.s)) 909 } 910 } 911 912 func Benchmark_CompactSplit(b *testing.B) { 913 ctx := phlarecontext.WithLogger(context.Background(), log.NewNopLogger()) 914 915 bkt, err := client.NewBucket(ctx, client.Config{ 916 StorageBackendConfig: client.StorageBackendConfig{ 917 Backend: client.Filesystem, 918 Filesystem: filesystem.Config{ 919 Directory: "./testdata/", 920 }, 921 }, 922 Prefix: "", 923 }, "test") 924 require.NoError(b, err) 925 meta, err := block.ReadMetaFromDir("./testdata/01HHYG6245NWHZWVP27V8WJRT7") 926 require.NoError(b, err) 927 bl := NewSingleBlockQuerierFromMeta(ctx, bkt, meta) 928 require.NoError(b, bl.Open(ctx)) 929 dst := b.TempDir() 930 931 b.ResetTimer() 932 b.ReportAllocs() 933 for i := 0; i < b.N; i++ { 934 _, err = CompactWithSplitting(ctx, CompactWithSplittingOpts{ 935 Src: []BlockReader{bl}, 936 Dst: dst, 937 SplitCount: 32, 938 StageSize: 32, 939 SplitBy: SplitByFingerprint, 940 DownsamplerEnabled: true, 941 Logger: log.NewNopLogger(), 942 }) 943 require.NoError(b, err) 944 } 945 }