github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/schemas/v1/profiles_test.go (about) 1 package v1 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "math/rand" 8 "sort" 9 "testing" 10 11 "github.com/google/uuid" 12 "github.com/parquet-go/parquet-go" 13 "github.com/stretchr/testify/assert" 14 "github.com/stretchr/testify/require" 15 16 phlareparquet "github.com/grafana/pyroscope/pkg/parquet" 17 ) 18 19 func TestInMemoryProfilesRowReader(t *testing.T) { 20 r := NewProfilesRowReader( 21 generateProfiles(10), 22 ) 23 24 batch := make([]parquet.Row, 3) 25 count := 0 26 for { 27 n, err := r.ReadRows(batch) 28 if err != nil && err != io.EOF { 29 t.Fatal(err) 30 } 31 count += n 32 if n == 0 || err == io.EOF { 33 break 34 } 35 } 36 require.Equal(t, 10, count) 37 } 38 39 const samplesPerProfile = 100 40 41 func TestRoundtripProfile(t *testing.T) { 42 profiles := generateProfiles(1000) 43 iprofiles := generateMemoryProfiles(1000) 44 actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(iprofiles)) 45 require.NoError(t, err) 46 expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles)) 47 require.NoError(t, err) 48 require.Equal(t, expected, actual) 49 _ = expected 50 _ = actual 51 52 t.Run("EmptyOptionalField", func(t *testing.T) { 53 profiles := generateProfiles(1) 54 for _, p := range profiles { 55 p.DurationNanos = 0 56 p.Period = 0 57 p.DefaultSampleType = 0 58 p.KeepFrames = 0 59 } 60 inMemoryProfiles := generateMemoryProfiles(1) 61 for i := range inMemoryProfiles { 62 inMemoryProfiles[i].DurationNanos = 0 63 inMemoryProfiles[i].Period = 0 64 inMemoryProfiles[i].DefaultSampleType = 0 65 inMemoryProfiles[i].KeepFrames = 0 66 } 67 expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles)) 68 require.NoError(t, err) 69 actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles)) 70 require.NoError(t, err) 71 require.Equal(t, expected, actual) 72 }) 73 t.Run("EmptyComment", func(t *testing.T) { 74 profiles := generateProfiles(1) 75 for _, p := range profiles { 76 p.Comments = nil 77 } 78 inMemoryProfiles := generateMemoryProfiles(1) 79 for i := range inMemoryProfiles { 80 inMemoryProfiles[i].Comments = nil 81 } 82 expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles)) 83 require.NoError(t, err) 84 actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles)) 85 require.NoError(t, err) 86 require.Equal(t, expected, actual) 87 }) 88 89 t.Run("EmptySamples", func(t *testing.T) { 90 profiles := generateProfiles(1) 91 for _, p := range profiles { 92 p.Samples = nil 93 } 94 inMemoryProfiles := generateMemoryProfiles(1) 95 for i := range inMemoryProfiles { 96 inMemoryProfiles[i].Samples = Samples{} 97 } 98 expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles)) 99 require.NoError(t, err) 100 actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles)) 101 require.NoError(t, err) 102 require.Equal(t, expected, actual) 103 }) 104 t.Run("SampleSpanID", func(t *testing.T) { 105 profiles := generateProfiles(1) 106 for _, p := range profiles { 107 for _, x := range p.Samples { 108 x.SpanID = rand.Uint64() 109 } 110 } 111 inMemoryProfiles := generateMemoryProfiles(1) 112 for i := range inMemoryProfiles { 113 spans := make([]uint64, len(inMemoryProfiles[i].Samples.Values)) 114 for j := range spans { 115 spans[j] = profiles[i].Samples[j].SpanID 116 } 117 inMemoryProfiles[i].Samples.Spans = spans 118 } 119 expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles)) 120 require.NoError(t, err) 121 actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles)) 122 require.NoError(t, err) 123 require.Equal(t, expected, actual) 124 }) 125 } 126 127 func TestCompactSamples(t *testing.T) { 128 require.Equal(t, Samples{ 129 StacktraceIDs: []uint32{1, 2, 3, 2, 5, 1, 7, 7, 1}, 130 Values: []uint64{1, 1, 1, 1, 1, 3, 1, 0, 1}, 131 }.Compact(true), Samples{ 132 StacktraceIDs: []uint32{1, 2, 3, 5, 7}, 133 Values: []uint64{5, 2, 1, 1, 1}, 134 }) 135 136 require.Equal(t, Samples{ 137 StacktraceIDs: []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9}, 138 Values: []uint64{1, 0, 1, 1, 1, 0, 1, 1, 0}, 139 }.Compact(false), Samples{ 140 StacktraceIDs: []uint32{1, 3, 4, 5, 7, 8}, 141 Values: []uint64{1, 1, 1, 1, 1, 1}, 142 }) 143 144 require.Equal(t, Samples{ 145 StacktraceIDs: []uint32{1, 2, 3}, 146 Values: []uint64{1, 2, 3}, 147 }.Compact(false), Samples{ 148 StacktraceIDs: []uint32{1, 2, 3}, 149 Values: []uint64{1, 2, 3}, 150 }) 151 } 152 153 func BenchmarkRowReader(b *testing.B) { 154 profiles := generateProfiles(1000) 155 iprofiles := generateMemoryProfiles(1000) 156 b.Run("in-memory", func(b *testing.B) { 157 b.ResetTimer() 158 for i := 0; i < b.N; i++ { 159 _, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(iprofiles)) 160 if err != nil { 161 b.Fatal(err) 162 } 163 } 164 }) 165 b.Run("schema", func(b *testing.B) { 166 b.ResetTimer() 167 for i := 0; i < b.N; i++ { 168 _, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles)) 169 if err != nil { 170 b.Fatal(err) 171 } 172 } 173 }) 174 } 175 176 func TestMergeProfiles(t *testing.T) { 177 reader := NewMergeProfilesRowReader([]parquet.RowReader{ 178 NewInMemoryProfilesRowReader([]InMemoryProfile{ 179 {SeriesIndex: 1, TimeNanos: 1}, 180 {SeriesIndex: 2, TimeNanos: 2}, 181 {SeriesIndex: 3, TimeNanos: 3}, 182 }), 183 NewInMemoryProfilesRowReader([]InMemoryProfile{ 184 {SeriesIndex: 1, TimeNanos: 4}, 185 {SeriesIndex: 2, TimeNanos: 5}, 186 {SeriesIndex: 3, TimeNanos: 6}, 187 }), 188 NewInMemoryProfilesRowReader([]InMemoryProfile{ 189 {SeriesIndex: 1, TimeNanos: 7}, 190 {SeriesIndex: 2, TimeNanos: 8}, 191 {SeriesIndex: 3, TimeNanos: 9}, 192 }), 193 }) 194 195 actual, err := phlareparquet.ReadAll(reader) 196 require.NoError(t, err) 197 compareProfileRows(t, generateProfileRow([]InMemoryProfile{ 198 {SeriesIndex: 1, TimeNanos: 1}, 199 {SeriesIndex: 1, TimeNanos: 4}, 200 {SeriesIndex: 1, TimeNanos: 7}, 201 {SeriesIndex: 2, TimeNanos: 2}, 202 {SeriesIndex: 2, TimeNanos: 5}, 203 {SeriesIndex: 2, TimeNanos: 8}, 204 {SeriesIndex: 3, TimeNanos: 3}, 205 {SeriesIndex: 3, TimeNanos: 6}, 206 {SeriesIndex: 3, TimeNanos: 9}, 207 }), actual) 208 } 209 210 func TestLessProfileRows(t *testing.T) { 211 for _, tc := range []struct { 212 a, b parquet.Row 213 expected bool 214 }{ 215 { 216 a: generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0], 217 b: generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0], 218 expected: false, 219 }, 220 { 221 a: generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0], 222 b: generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 2}})[0], 223 expected: true, 224 }, 225 { 226 a: generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0], 227 b: generateProfileRow([]InMemoryProfile{{SeriesIndex: 2, TimeNanos: 1}})[0], 228 expected: true, 229 }, 230 } { 231 t.Run("", func(t *testing.T) { 232 require.Equal(t, tc.expected, lessProfileRows(tc.a, tc.b)) 233 }) 234 } 235 } 236 237 func TestProfileRowStacktraceIDs(t *testing.T) { 238 for _, tc := range []struct { 239 name string 240 expected []uint32 241 profile InMemoryProfile 242 }{ 243 {"empty", nil, InMemoryProfile{}}, 244 {"one sample", []uint32{1}, InMemoryProfile{ 245 SeriesIndex: 1, 246 StacktracePartition: 2, 247 TotalValue: 3, 248 Samples: Samples{StacktraceIDs: []uint32{1}, Values: []uint64{1}}, 249 }}, 250 {"many", []uint32{1, 1, 2, 3, 4}, InMemoryProfile{ 251 SeriesIndex: 1, 252 StacktracePartition: 2, 253 TotalValue: 3, 254 Samples: Samples{ 255 StacktraceIDs: []uint32{1, 1, 2, 3, 4}, 256 Values: []uint64{4, 2, 4, 5, 2}, 257 }, 258 }}, 259 } { 260 tc := tc 261 t.Run(tc.name, func(t *testing.T) { 262 rows := generateProfileRow([]InMemoryProfile{tc.profile}) 263 var ids []uint32 264 ProfileRow(rows[0]).ForStacktraceIDsValues(func(values []parquet.Value) { 265 for _, v := range values { 266 ids = append(ids, v.Uint32()) 267 } 268 }) 269 require.Equal(t, tc.expected, ids) 270 }) 271 } 272 } 273 274 func TestProfileRowMutateValues(t *testing.T) { 275 row := ProfileRow(generateProfileRow([]InMemoryProfile{ 276 { 277 Samples: Samples{ 278 StacktraceIDs: []uint32{1, 1, 2, 3, 4}, 279 Values: []uint64{4, 2, 4, 5, 2}, 280 }, 281 }, 282 })[0]) 283 row.ForStacktraceIDsValues(func(values []parquet.Value) { 284 for i := range values { 285 values[i] = parquet.Int32Value(1).Level(0, 1, values[i].Column()) 286 } 287 }) 288 var ids []uint32 289 row.ForStacktraceIDsValues(func(values []parquet.Value) { 290 for _, v := range values { 291 ids = append(ids, v.Uint32()) 292 } 293 }) 294 require.Equal(t, []uint32{1, 1, 1, 1, 1}, ids) 295 } 296 297 func BenchmarkProfileRows(b *testing.B) { 298 a := generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0] 299 a1 := generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 2}})[0] 300 a2 := generateProfileRow([]InMemoryProfile{{SeriesIndex: 2, TimeNanos: 1}})[0] 301 302 b.ResetTimer() 303 b.ReportAllocs() 304 305 for i := 0; i < b.N; i++ { 306 lessProfileRows(a, a) 307 lessProfileRows(a, a1) 308 lessProfileRows(a, a2) 309 } 310 } 311 312 func Benchmark_SpanID_Encoding(b *testing.B) { 313 const profilesN = 1000 314 315 profiles := func(share float64) []InMemoryProfile { 316 randomSpanIDs := make([]uint64, int(samplesPerProfile*share)) 317 inMemoryProfiles := generateMemoryProfiles(profilesN) 318 for j := range inMemoryProfiles { 319 for i := range randomSpanIDs { 320 randomSpanIDs[i] = rand.Uint64() 321 } 322 spans := make([]uint64, len(inMemoryProfiles[j].Samples.Values)) 323 for o := range spans { 324 spans[o] = randomSpanIDs[o%len(randomSpanIDs)] 325 } 326 inMemoryProfiles[j].Samples.Spans = spans 327 // We only need this for RLE. 328 sort.Sort(SamplesBySpanID(inMemoryProfiles[j].Samples)) 329 } 330 return inMemoryProfiles 331 } 332 333 for _, share := range []float64{ 334 1, 335 0.5, 336 0.25, 337 0.15, 338 0.05, 339 } { 340 share := share 341 b.Run(fmt.Sprintf("%v (%d/%d)", share, int(samplesPerProfile*share), samplesPerProfile), func(b *testing.B) { 342 inMemoryProfiles := profiles(share) 343 var buf bytes.Buffer 344 w := parquet.NewGenericWriter[*Profile](&buf, ProfilesSchema) 345 346 n, err := parquet.CopyRows(w, NewInMemoryProfilesRowReader(inMemoryProfiles)) 347 require.NoError(b, err) 348 require.Equal(b, len(inMemoryProfiles), int(n)) 349 require.NoError(b, w.Close()) 350 351 b.ResetTimer() 352 b.ReportAllocs() 353 354 for i := 0; i < b.N; i++ { 355 b.ReportMetric(float64(buf.Len()), "bytes") 356 r := parquet.NewReader(bytes.NewReader(buf.Bytes()), ProfilesSchema) 357 n, err = parquet.CopyRows(parquet.MultiRowWriter(), r) 358 require.NoError(b, err) 359 require.Equal(b, len(inMemoryProfiles), int(n)) 360 } 361 }) 362 } 363 } 364 365 func compareProfileRows(t *testing.T, expected, actual []parquet.Row) { 366 t.Helper() 367 require.Equal(t, len(expected), len(actual)) 368 for i := range expected { 369 expectedProfile, actualProfile := &Profile{}, &Profile{} 370 require.NoError(t, ProfilesSchema.Reconstruct(actualProfile, actual[i])) 371 require.NoError(t, ProfilesSchema.Reconstruct(expectedProfile, expected[i])) 372 require.Equal(t, expectedProfile, actualProfile, "row %d", i) 373 } 374 } 375 376 func generateProfileRow(in []InMemoryProfile) []parquet.Row { 377 rows := make([]parquet.Row, len(in)) 378 for i, p := range in { 379 rows[i] = deconstructMemoryProfile(p, rows[i]) 380 } 381 return rows 382 } 383 384 func generateMemoryProfiles(n int) []InMemoryProfile { 385 profiles := make([]InMemoryProfile, n) 386 for i := 0; i < n; i++ { 387 stacktraceID := make([]uint32, samplesPerProfile) 388 value := make([]uint64, samplesPerProfile) 389 for j := 0; j < samplesPerProfile; j++ { 390 stacktraceID[j] = uint32(j) 391 value[j] = uint64(j) 392 } 393 profiles[i] = InMemoryProfile{ 394 ID: uuid.MustParse(fmt.Sprintf("00000000-0000-0000-0000-%012d", i)), 395 SeriesIndex: uint32(i), 396 DropFrames: 1, 397 KeepFrames: 3, 398 TimeNanos: int64(i), 399 TotalValue: 100, 400 Period: 100000, 401 DurationNanos: 1000000000, 402 Comments: []int64{1, 2, 3}, 403 DefaultSampleType: 2, 404 Samples: Samples{ 405 StacktraceIDs: stacktraceID, 406 Values: value, 407 }, 408 } 409 } 410 return profiles 411 } 412 413 func generateProfiles(n int) []*Profile { 414 profiles := make([]*Profile, n) 415 for i := 0; i < n; i++ { 416 profiles[i] = &Profile{ 417 ID: uuid.MustParse(fmt.Sprintf("00000000-0000-0000-0000-%012d", i)), 418 SeriesIndex: uint32(i), 419 DropFrames: 1, 420 KeepFrames: 3, 421 TotalValue: 100, 422 TimeNanos: int64(i), 423 Period: 100000, 424 DurationNanos: 1000000000, 425 Comments: []int64{1, 2, 3}, 426 DefaultSampleType: 2, 427 Samples: generateSamples(samplesPerProfile), 428 } 429 } 430 431 return profiles 432 } 433 434 func generateSamples(n int) []*Sample { 435 samples := make([]*Sample, n) 436 for i := 0; i < n; i++ { 437 samples[i] = &Sample{ 438 StacktraceID: uint64(i), 439 Value: int64(i), 440 } 441 } 442 return samples 443 } 444 445 func Test_SamplesFromMap(t *testing.T) { 446 m := map[uint32]uint64{ 447 1: 2, 448 0: 0, 449 2: 3, 450 3: 0, 451 } 452 samples := NewSamplesFromMap(m) 453 assert.Equal(t, len(m), cap(samples.Values)) 454 assert.Equal(t, 2, len(samples.Values)) 455 } 456 457 func Test_SamplesRange(t *testing.T) { 458 tests := []struct { 459 name string 460 input Samples 461 n, m int 462 expected Samples 463 }{ 464 { 465 name: "empty spans", 466 input: Samples{ 467 StacktraceIDs: []uint32{1, 2, 3, 4, 5}, 468 Values: []uint64{10, 20, 30, 40, 50}, 469 }, 470 n: 1, 471 m: 3, 472 expected: Samples{ 473 StacktraceIDs: []uint32{2, 3}, 474 Values: []uint64{20, 30}, 475 }, 476 }, 477 { 478 name: "non-empty Spans", 479 input: Samples{ 480 StacktraceIDs: []uint32{1, 2, 3, 4, 5}, 481 Values: []uint64{10, 20, 30, 40, 50}, 482 Spans: []uint64{100, 200, 300, 400, 500}, 483 }, 484 n: 1, 485 m: 4, 486 expected: Samples{ 487 StacktraceIDs: []uint32{2, 3, 4}, 488 Values: []uint64{20, 30, 40}, 489 Spans: []uint64{200, 300, 400}, 490 }, 491 }, 492 { 493 name: "all", 494 input: Samples{ 495 StacktraceIDs: []uint32{1, 2, 3}, 496 Values: []uint64{10, 20, 30}, 497 Spans: []uint64{100, 200, 300}, 498 }, 499 n: 0, 500 m: 3, 501 expected: Samples{ 502 StacktraceIDs: []uint32{1, 2, 3}, 503 Values: []uint64{10, 20, 30}, 504 Spans: []uint64{100, 200, 300}, 505 }, 506 }, 507 { 508 name: "oob: n < 0", 509 input: Samples{ 510 StacktraceIDs: []uint32{1, 2, 3}, 511 Values: []uint64{10, 20, 30}, 512 }, 513 n: -1, 514 m: 3, 515 }, 516 { 517 name: "oob: m > n", 518 input: Samples{ 519 StacktraceIDs: []uint32{1, 2, 3}, 520 Values: []uint64{10, 20, 30}, 521 }, 522 n: 3, 523 m: 1, 524 }, 525 { 526 name: "oob: m > len", 527 input: Samples{ 528 StacktraceIDs: []uint32{1, 2, 3}, 529 Values: []uint64{10, 20, 30}, 530 }, 531 n: 3, 532 m: 5, 533 }, 534 } 535 536 for _, tt := range tests { 537 t.Run(tt.name, func(t *testing.T) { 538 result := tt.input.Range(tt.n, tt.m) 539 assert.Equal(t, tt.expected.StacktraceIDs, result.StacktraceIDs) 540 assert.Equal(t, tt.expected.Values, result.Values) 541 assert.Equal(t, tt.expected.Spans, result.Spans) 542 }) 543 } 544 } 545 546 func TestColumnCount(t *testing.T) { 547 profiles := []InMemoryProfile{{ 548 SeriesIndex: 1, 549 TimeNanos: 2, 550 Samples: Samples{ 551 StacktraceIDs: []uint32{1, 2, 3}, 552 Values: []uint64{1, 2, 3}, 553 }, 554 }, 555 { 556 SeriesIndex: 1, 557 TimeNanos: 2, 558 Samples: Samples{ 559 StacktraceIDs: []uint32{1, 2, 3}, 560 Values: []uint64{1, 2, 3}, 561 Spans: []uint64{1, 2, 3}, 562 }, 563 }, 564 { 565 SeriesIndex: 1, 566 TimeNanos: 2, 567 Samples: Samples{ 568 StacktraceIDs: []uint32{1, 2, 3}, 569 Values: []uint64{1, 2, 3}, 570 Spans: []uint64{1, 2, 3}, 571 }, 572 Comments: []int64{1, 2, 3}, 573 }} 574 for _, profile := range profiles { 575 count := profileColumnCount(profile) 576 577 row := deconstructMemoryProfile(profile, nil) 578 assert.Equal(t, len(row), count) 579 assert.Equal(t, cap(row), count) 580 } 581 582 }