github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/schemas/v1/profiles.go (about) 1 package v1 2 3 import ( 4 "fmt" 5 "io" 6 "math" 7 "sort" 8 "strings" 9 "unsafe" 10 11 "github.com/google/uuid" 12 "github.com/parquet-go/parquet-go" 13 "github.com/prometheus/common/model" 14 15 profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 16 phlareparquet "github.com/grafana/pyroscope/pkg/parquet" 17 ) 18 19 const ( 20 IDColumnName = "ID" 21 SeriesIndexColumnName = "SeriesIndex" 22 TimeNanosColumnName = "TimeNanos" 23 StacktracePartitionColumnName = "StacktracePartition" 24 TotalValueColumnName = "TotalValue" 25 SamplesColumnName = "Samples" 26 AnnotationsColumnName = "Annotations" 27 ) 28 29 var ( 30 stringRef = parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked) 31 pprofLabels = parquet.List(phlareparquet.Group{ 32 phlareparquet.NewGroupField("Key", stringRef), 33 phlareparquet.NewGroupField("Str", parquet.Optional(stringRef)), 34 phlareparquet.NewGroupField("Num", parquet.Optional(parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked))), 35 phlareparquet.NewGroupField("NumUnit", parquet.Optional(stringRef)), 36 }) 37 sampleField = phlareparquet.Group{ 38 phlareparquet.NewGroupField("StacktraceID", parquet.Encoded(parquet.Uint(64), &parquet.DeltaBinaryPacked)), 39 phlareparquet.NewGroupField("Value", parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked)), 40 phlareparquet.NewGroupField("Labels", pprofLabels), 41 phlareparquet.NewGroupField("SpanID", parquet.Optional(parquet.Encoded(parquet.Uint(64), &parquet.RLEDictionary))), 42 } 43 ProfilesSchema = parquet.NewSchema("Profile", phlareparquet.Group{ 44 phlareparquet.NewGroupField("ID", parquet.UUID()), 45 phlareparquet.NewGroupField(SeriesIndexColumnName, parquet.Encoded(parquet.Uint(32), &parquet.DeltaBinaryPacked)), 46 phlareparquet.NewGroupField(StacktracePartitionColumnName, parquet.Encoded(parquet.Uint(64), &parquet.DeltaBinaryPacked)), 47 phlareparquet.NewGroupField(TotalValueColumnName, parquet.Encoded(parquet.Uint(64), &parquet.DeltaBinaryPacked)), 48 phlareparquet.NewGroupField(SamplesColumnName, parquet.List(sampleField)), 49 phlareparquet.NewGroupField("DropFrames", parquet.Optional(stringRef)), 50 phlareparquet.NewGroupField("KeepFrames", parquet.Optional(stringRef)), 51 phlareparquet.NewGroupField(TimeNanosColumnName, parquet.Timestamp(parquet.Nanosecond)), 52 phlareparquet.NewGroupField("DurationNanos", parquet.Optional(parquet.Int(64))), 53 phlareparquet.NewGroupField("Period", parquet.Optional(parquet.Int(64))), 54 phlareparquet.NewGroupField("Comments", parquet.List(stringRef)), 55 phlareparquet.NewGroupField("DefaultSampleType", parquet.Optional(parquet.Int(64))), 56 phlareparquet.NewGroupField(AnnotationsColumnName, parquet.List( 57 phlareparquet.Group{ 58 phlareparquet.NewGroupField("Key", parquet.String()), 59 phlareparquet.NewGroupField("Value", parquet.String()), 60 })), 61 }) 62 DownsampledProfilesSchema = parquet.NewSchema("DownsampledProfile", phlareparquet.Group{ 63 phlareparquet.NewGroupField(SeriesIndexColumnName, parquet.Encoded(parquet.Uint(32), &parquet.DeltaBinaryPacked)), 64 phlareparquet.NewGroupField(StacktracePartitionColumnName, parquet.Encoded(parquet.Uint(64), &parquet.DeltaBinaryPacked)), 65 phlareparquet.NewGroupField(TotalValueColumnName, parquet.Encoded(parquet.Uint(64), &parquet.DeltaBinaryPacked)), 66 phlareparquet.NewGroupField(SamplesColumnName, parquet.List( 67 phlareparquet.Group{ 68 phlareparquet.NewGroupField("StacktraceID", parquet.Encoded(parquet.Uint(64), &parquet.DeltaBinaryPacked)), 69 phlareparquet.NewGroupField("Value", parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked)), 70 })), 71 phlareparquet.NewGroupField(TimeNanosColumnName, parquet.Timestamp(parquet.Nanosecond)), 72 phlareparquet.NewGroupField(AnnotationsColumnName, parquet.List( 73 phlareparquet.Group{ 74 phlareparquet.NewGroupField("Key", parquet.String()), 75 phlareparquet.NewGroupField("Value", parquet.String()), 76 })), 77 }) 78 79 sampleStacktraceIDColumnPath = strings.Split("Samples.list.element.StacktraceID", ".") 80 SampleValueColumnPath = strings.Split("Samples.list.element.Value", ".") 81 sampleSpanIDColumnPath = strings.Split("Samples.list.element.SpanID", ".") 82 83 maxProfileRow parquet.Row 84 seriesIndexColIndex int 85 stacktraceIDColIndex int 86 valueColIndex int 87 timeNanoColIndex int 88 stacktracePartitionColIndex int 89 totalValueColIndex int 90 91 AnnotationKeyColumnPath = strings.Split("Annotations.list.element.Key", ".") 92 AnnotationValueColumnPath = strings.Split("Annotations.list.element.Value", ".") 93 annotationKeyColumnIndex int 94 annotationValueColumnIndex int 95 96 downsampledValueColIndex int 97 downsampledAnnotationValueColIndex int 98 99 ErrColumnNotFound = fmt.Errorf("column path not found") 100 ) 101 102 func init() { 103 maxProfileRow = deconstructMemoryProfile(InMemoryProfile{ 104 SeriesIndex: math.MaxUint32, 105 TimeNanos: math.MaxInt64, 106 }, maxProfileRow) 107 seriesCol, ok := ProfilesSchema.Lookup(SeriesIndexColumnName) 108 if !ok { 109 panic(fmt.Errorf("SeriesIndex index column not found")) 110 } 111 seriesIndexColIndex = seriesCol.ColumnIndex 112 timeCol, ok := ProfilesSchema.Lookup(TimeNanosColumnName) 113 if !ok { 114 panic(fmt.Errorf("TimeNanos column not found")) 115 } 116 timeNanoColIndex = timeCol.ColumnIndex 117 stacktraceIDCol, ok := ProfilesSchema.Lookup(sampleStacktraceIDColumnPath...) 118 if !ok { 119 panic(fmt.Errorf("StacktraceID column not found")) 120 } 121 stacktraceIDColIndex = stacktraceIDCol.ColumnIndex 122 valueCol, ok := ProfilesSchema.Lookup(SampleValueColumnPath...) 123 if !ok { 124 panic(fmt.Errorf("Sample.Value column not found")) 125 } 126 valueColIndex = valueCol.ColumnIndex 127 stacktracePartitionCol, ok := ProfilesSchema.Lookup(StacktracePartitionColumnName) 128 if !ok { 129 panic(fmt.Errorf("StacktracePartition column not found")) 130 } 131 stacktracePartitionColIndex = stacktracePartitionCol.ColumnIndex 132 totalValueCol, ok := ProfilesSchema.Lookup(TotalValueColumnName) 133 if !ok { 134 panic(fmt.Errorf("TotalValue column not found")) 135 } 136 totalValueColIndex = totalValueCol.ColumnIndex 137 138 downsampledValueCol, ok := DownsampledProfilesSchema.Lookup(SampleValueColumnPath...) 139 if !ok { 140 panic(fmt.Errorf("Sample.Value column not found")) 141 } 142 downsampledValueColIndex = downsampledValueCol.ColumnIndex 143 downsampledAnnotationValueCol, ok := DownsampledProfilesSchema.Lookup(AnnotationValueColumnPath...) 144 if !ok { 145 panic(fmt.Errorf("Annotation.Value column not found")) 146 } 147 downsampledAnnotationValueColIndex = downsampledAnnotationValueCol.ColumnIndex 148 149 annotationKeyColumn, ok := ProfilesSchema.Lookup(AnnotationKeyColumnPath...) 150 if !ok { 151 panic(fmt.Errorf("annotation key column not found")) 152 } 153 annotationKeyColumnIndex = annotationKeyColumn.ColumnIndex 154 annotationValueColum, ok := ProfilesSchema.Lookup(AnnotationValueColumnPath...) 155 if !ok { 156 panic(fmt.Errorf("annotation value column not found")) 157 } 158 annotationValueColumnIndex = annotationValueColum.ColumnIndex 159 } 160 161 type SampleColumns struct { 162 StacktraceID parquet.LeafColumn 163 Value parquet.LeafColumn 164 SpanID parquet.LeafColumn 165 } 166 167 func (c *SampleColumns) Resolve(schema *parquet.Schema) error { 168 var err error 169 if c.StacktraceID, err = ResolveColumnByPath(schema, sampleStacktraceIDColumnPath); err != nil { 170 return err 171 } 172 if c.Value, err = ResolveColumnByPath(schema, SampleValueColumnPath); err != nil { 173 return err 174 } 175 // Optional. 176 c.SpanID, _ = ResolveColumnByPath(schema, sampleSpanIDColumnPath) 177 return nil 178 } 179 180 func (c *SampleColumns) HasSpanID() bool { 181 return c.SpanID.Node != nil 182 } 183 184 func ResolveColumnByPath(schema *parquet.Schema, path []string) (parquet.LeafColumn, error) { 185 if c, ok := schema.Lookup(path...); ok { 186 return c, nil 187 } 188 return parquet.LeafColumn{}, fmt.Errorf("%w: %v", ErrColumnNotFound, path) 189 } 190 191 type Sample struct { 192 StacktraceID uint64 `parquet:",delta"` 193 Value int64 `parquet:",delta"` 194 Labels []*profilev1.Label `parquet:",list"` 195 SpanID uint64 `parquet:",optional"` 196 } 197 198 type Profile struct { 199 // A UUID per ingested profile 200 ID uuid.UUID `parquet:",uuid"` 201 202 // SeriesIndex references the underlying series and is generated when 203 // writing the TSDB index. The SeriesIndex is different from block to 204 // block. 205 SeriesIndex uint32 `parquet:",delta"` 206 207 // StacktracePartition is the partition ID of the stacktrace table that this profile belongs to. 208 StacktracePartition uint64 `parquet:",delta"` 209 210 // TotalValue is the sum of all values in the profile. 211 TotalValue uint64 `parquet:",delta"` 212 213 // SeriesFingerprint references the underlying series and is purely based 214 // on the label values. The value is consistent for the same label set (so 215 // also between different blocks). 216 SeriesFingerprint model.Fingerprint `parquet:"-"` 217 218 // The set of samples recorded in this profile. 219 Samples []*Sample `parquet:",list"` 220 221 // frames with Function.function_name fully matching the following 222 // regexp will be dropped from the samples, along with their successors. 223 DropFrames int64 `parquet:",optional"` // Index into string table. 224 // frames with Function.function_name fully matching the following 225 // regexp will be kept, even if it matches drop_frames. 226 KeepFrames int64 `parquet:",optional"` // Index into string table. 227 // Time of collection (UTC) represented as nanoseconds past the epoch. 228 TimeNanos int64 `parquet:",delta,timestamp(nanosecond)"` 229 // Duration of the profile, if a duration makes sense. 230 DurationNanos int64 `parquet:",delta,optional"` 231 // The number of events between sampled occurrences. 232 Period int64 `parquet:",optional"` 233 // Freeform text associated to the profile. 234 Comments []int64 `parquet:",list"` // Indices into string table. 235 // Index into the string table of the type of the preferred sample 236 // value. If unset, clients should default to the last sample value. 237 DefaultSampleType int64 `parquet:",optional"` 238 239 // Additional metadata about the profile 240 Annotations []*Annotation `parquet:",list"` 241 } 242 243 type Annotation struct { 244 Key string `parquet:","` 245 Value string `parquet:","` 246 } 247 248 type Annotations struct { 249 Keys []string 250 Values []string 251 } 252 253 func (p Profile) Timestamp() model.Time { 254 return model.TimeFromUnixNano(p.TimeNanos) 255 } 256 257 func (p Profile) Total() int64 { 258 var total int64 259 for _, sample := range p.Samples { 260 total += sample.Value 261 } 262 return total 263 } 264 265 type ProfilePersister struct{} 266 267 func (*ProfilePersister) Name() string { 268 return "profiles" 269 } 270 271 func (*ProfilePersister) Schema() *parquet.Schema { 272 return ProfilesSchema 273 } 274 275 func (*ProfilePersister) Deconstruct(row parquet.Row, s *Profile) parquet.Row { 276 row = ProfilesSchema.Deconstruct(row, s) 277 return row 278 } 279 280 func (*ProfilePersister) Reconstruct(row parquet.Row) (s *Profile, err error) { 281 var profile Profile 282 if err := ProfilesSchema.Reconstruct(&profile, row); err != nil { 283 return nil, err 284 } 285 return &profile, nil 286 } 287 288 type SliceRowReader[T any] struct { 289 slice []T 290 serialize func(T, parquet.Row) parquet.Row 291 } 292 293 func NewProfilesRowReader(slice []*Profile) *SliceRowReader[*Profile] { 294 return &SliceRowReader[*Profile]{ 295 slice: slice, 296 serialize: func(p *Profile, r parquet.Row) parquet.Row { 297 return ProfilesSchema.Deconstruct(r, p) 298 }, 299 } 300 } 301 302 func (r *SliceRowReader[T]) ReadRows(rows []parquet.Row) (n int, err error) { 303 if len(r.slice) == 0 { 304 return 0, io.EOF 305 } 306 if len(rows) > len(r.slice) { 307 rows = rows[:len(r.slice)] 308 err = io.EOF 309 } 310 for pos, p := range r.slice[:len(rows)] { 311 // Serialize the row. Note that the row may 312 // be already initialized and contain values, 313 // therefore it must be reset. 314 row := rows[pos][:0] 315 rows[pos] = r.serialize(p, row) 316 n++ 317 } 318 r.slice = r.slice[len(rows):] 319 return n, err 320 } 321 322 type InMemoryProfile struct { 323 // A UUID per ingested profile 324 ID uuid.UUID 325 326 // SeriesIndex references the underlying series and is generated when 327 // writing the TSDB index. The SeriesIndex is different from block to 328 // block. 329 SeriesIndex uint32 330 331 // StacktracePartition is the partition ID of the stacktrace table that this profile belongs to. 332 StacktracePartition uint64 333 334 // TotalValue is the sum of all values in the profile. 335 TotalValue uint64 336 337 // SeriesFingerprint references the underlying series and is purely based 338 // on the label values. The value is consistent for the same label set (so 339 // also between different blocks). 340 SeriesFingerprint model.Fingerprint 341 342 // frames with Function.function_name fully matching the following 343 // regexp will be dropped from the samples, along with their successors. 344 DropFrames int64 345 // frames with Function.function_name fully matching the following 346 // regexp will be kept, even if it matches drop_frames. 347 KeepFrames int64 348 // Time of collection (UTC) represented as nanoseconds past the epoch. 349 TimeNanos int64 350 // Duration of the profile, if a duration makes sense. 351 DurationNanos int64 352 // The number of events between sampled occurrences. 353 Period int64 354 // Freeform text associated to the profile. 355 Comments []int64 356 // Index into the string table of the type of the preferred sample 357 // value. If unset, clients should default to the last sample value. 358 DefaultSampleType int64 359 360 Samples Samples 361 362 Annotations Annotations 363 } 364 365 type Samples struct { 366 StacktraceIDs []uint32 367 Values []uint64 368 // Span associated with samples. 369 // Optional: Spans == nil, if not present. 370 Spans []uint64 371 } 372 373 func NewSamples(size int) Samples { 374 return Samples{ 375 StacktraceIDs: make([]uint32, size), 376 Values: make([]uint64, size), 377 } 378 } 379 380 func NewSamplesFromMap(m map[uint32]uint64) Samples { 381 s := Samples{ 382 StacktraceIDs: make([]uint32, len(m)), 383 Values: make([]uint64, len(m)), 384 } 385 var i int 386 for k, v := range m { 387 if k != 0 && v > 0 { 388 s.StacktraceIDs[i] = k 389 s.Values[i] = v 390 i++ 391 } 392 } 393 s.StacktraceIDs = s.StacktraceIDs[:i] 394 s.Values = s.Values[:i] 395 sort.Sort(s) 396 return s 397 } 398 399 // Compact zero samples and optionally duplicates. 400 func (s Samples) Compact(dedupe bool) Samples { 401 if len(s.StacktraceIDs) == 0 { 402 return s 403 } 404 if dedupe { 405 s = trimDuplicateSamples(s) 406 } 407 return trimZeroAndNegativeSamples(s) 408 } 409 410 func (s Samples) Clone() Samples { 411 return cloneSamples(s) 412 } 413 414 func (s Samples) Range(n, m int) Samples { 415 if n < 0 || n > m || m > s.Len() { 416 return Samples{} 417 } 418 x := Samples{ 419 StacktraceIDs: s.StacktraceIDs[n:m], 420 Values: s.Values[n:m], 421 } 422 if len(s.Spans) > 0 { 423 x.Spans = s.Spans[n:m] 424 } 425 return x 426 } 427 428 func trimDuplicateSamples(samples Samples) Samples { 429 sort.Sort(samples) 430 n := 0 431 for j := 1; j < len(samples.StacktraceIDs); j++ { 432 if samples.StacktraceIDs[n] == samples.StacktraceIDs[j] { 433 samples.Values[n] += samples.Values[j] 434 } else { 435 n++ 436 samples.StacktraceIDs[n] = samples.StacktraceIDs[j] 437 samples.Values[n] = samples.Values[j] 438 } 439 } 440 return Samples{ 441 StacktraceIDs: samples.StacktraceIDs[:n+1], 442 Values: samples.Values[:n+1], 443 } 444 } 445 446 func trimZeroAndNegativeSamples(samples Samples) Samples { 447 n := 0 448 for j, v := range samples.Values { 449 if v > 0 { 450 samples.Values[n] = v 451 samples.StacktraceIDs[n] = samples.StacktraceIDs[j] 452 if len(samples.Spans) > 0 { 453 samples.Spans[n] = samples.Spans[j] 454 } 455 n++ 456 } 457 } 458 s := Samples{ 459 StacktraceIDs: samples.StacktraceIDs[:n], 460 Values: samples.Values[:n], 461 } 462 if len(samples.Spans) > 0 { 463 s.Spans = samples.Spans[:n] 464 } 465 return s 466 } 467 468 func cloneSamples(samples Samples) Samples { 469 return Samples{ 470 StacktraceIDs: copySlice(samples.StacktraceIDs), 471 Values: copySlice(samples.Values), 472 Spans: copySlice(samples.Spans), 473 } 474 } 475 476 func (s Samples) Less(i, j int) bool { 477 return s.StacktraceIDs[i] < s.StacktraceIDs[j] 478 } 479 480 func (s Samples) Swap(i, j int) { 481 s.StacktraceIDs[i], s.StacktraceIDs[j] = s.StacktraceIDs[j], s.StacktraceIDs[i] 482 s.Values[i], s.Values[j] = s.Values[j], s.Values[i] 483 if len(s.Spans) > 0 { 484 s.Spans[i], s.Spans[j] = s.Spans[j], s.Spans[i] 485 } 486 } 487 488 func (s Samples) Len() int { 489 return len(s.StacktraceIDs) 490 } 491 492 type SamplesBySpanID Samples 493 494 func (s SamplesBySpanID) Less(i, j int) bool { 495 return s.Spans[i] < s.Spans[j] 496 } 497 498 func (s SamplesBySpanID) Swap(i, j int) { 499 s.StacktraceIDs[i], s.StacktraceIDs[j] = s.StacktraceIDs[j], s.StacktraceIDs[i] 500 s.Values[i], s.Values[j] = s.Values[j], s.Values[i] 501 if len(s.Spans) > 0 { 502 s.Spans[i], s.Spans[j] = s.Spans[j], s.Spans[i] 503 } 504 } 505 506 func (s SamplesBySpanID) Len() int { 507 return len(s.Spans) 508 } 509 510 func (s Samples) Sum() uint64 { 511 var sum uint64 512 for _, v := range s.Values { 513 sum += v 514 } 515 return sum 516 } 517 518 const profileSize = uint64(unsafe.Sizeof(InMemoryProfile{})) 519 520 func (p InMemoryProfile) Size() uint64 { 521 size := profileSize + uint64(cap(p.Comments)*8) 522 // 4 bytes for stacktrace id and 8 bytes for each stacktrace value 523 return size + uint64(cap(p.Samples.StacktraceIDs)*(4+8)) 524 } 525 526 func (p InMemoryProfile) Timestamp() model.Time { 527 return model.TimeFromUnixNano(p.TimeNanos) 528 } 529 530 func (p InMemoryProfile) Total() int64 { 531 var total int64 532 for _, sample := range p.Samples.Values { 533 total += int64(sample) 534 } 535 return total 536 } 537 538 func copySlice[T any](in []T) []T { 539 if len(in) == 0 { 540 return nil 541 } 542 out := make([]T, len(in)) 543 copy(out, in) 544 return out 545 } 546 547 func NewInMemoryProfilesRowReader(slice []InMemoryProfile) *SliceRowReader[InMemoryProfile] { 548 return &SliceRowReader[InMemoryProfile]{ 549 slice: slice, 550 serialize: deconstructMemoryProfile, 551 } 552 } 553 554 func deconstructMemoryProfile(imp InMemoryProfile, row parquet.Row) parquet.Row { 555 var ( 556 col = -1 557 newCol = func() int { 558 col++ 559 return col 560 } 561 totalCols = profileColumnCount(imp) 562 ) 563 564 if cap(row) < totalCols { 565 row = make(parquet.Row, 0, totalCols) 566 } 567 row = row[:0] 568 row = append(row, parquet.FixedLenByteArrayValue(imp.ID[:]).Level(0, 0, newCol())) 569 row = append(row, parquet.Int32Value(int32(imp.SeriesIndex)).Level(0, 0, newCol())) 570 row = append(row, parquet.Int64Value(int64(imp.StacktracePartition)).Level(0, 0, newCol())) 571 row = append(row, parquet.Int64Value(int64(imp.TotalValue)).Level(0, 0, newCol())) 572 573 newCol() 574 repetition := -1 575 if len(imp.Samples.Values) == 0 { 576 row = append(row, parquet.Value{}.Level(0, 0, col)) 577 } 578 for i := range imp.Samples.StacktraceIDs { 579 if repetition < 1 { 580 repetition++ 581 } 582 row = append(row, parquet.Int64Value(int64(imp.Samples.StacktraceIDs[i])).Level(repetition, 1, col)) 583 } 584 585 newCol() 586 repetition = -1 587 if len(imp.Samples.Values) == 0 { 588 row = append(row, parquet.Value{}.Level(0, 0, col)) 589 } 590 for i := range imp.Samples.Values { 591 if repetition < 1 { 592 repetition++ 593 } 594 row = append(row, parquet.Int64Value(int64(imp.Samples.Values[i])).Level(repetition, 1, col)) 595 } 596 597 for i := 0; i < 4; i++ { 598 newCol() 599 repetition := -1 600 if len(imp.Samples.Values) == 0 { 601 row = append(row, parquet.Value{}.Level(0, 0, col)) 602 } 603 for range imp.Samples.Values { 604 if repetition < 1 { 605 repetition++ 606 } 607 row = append(row, parquet.Value{}.Level(repetition, 1, col)) 608 } 609 } 610 611 newCol() 612 repetition = -1 613 if len(imp.Samples.Spans) == 0 { 614 // Fill the row with empty entries (one per value). 615 if len(imp.Samples.Values) == 0 { 616 row = append(row, parquet.Value{}.Level(0, 0, col)) 617 } 618 for range imp.Samples.Values { 619 if repetition < 1 { 620 repetition++ 621 } 622 row = append(row, parquet.Value{}.Level(repetition, 1, col)) 623 } 624 } else { 625 for i := range imp.Samples.Spans { 626 if repetition < 1 { 627 repetition++ 628 } 629 row = append(row, parquet.Int64Value(int64(imp.Samples.Spans[i])).Level(repetition, 2, col)) 630 } 631 } 632 633 if imp.DropFrames == 0 { 634 row = append(row, parquet.Value{}.Level(0, 0, newCol())) 635 } else { 636 row = append(row, parquet.Int64Value(imp.DropFrames).Level(0, 1, newCol())) 637 } 638 if imp.KeepFrames == 0 { 639 row = append(row, parquet.Value{}.Level(0, 0, newCol())) 640 } else { 641 row = append(row, parquet.Int64Value(imp.KeepFrames).Level(0, 1, newCol())) 642 } 643 row = append(row, parquet.Int64Value(imp.TimeNanos).Level(0, 0, newCol())) 644 if imp.DurationNanos == 0 { 645 row = append(row, parquet.Value{}.Level(0, 0, newCol())) 646 } else { 647 row = append(row, parquet.Int64Value(imp.DurationNanos).Level(0, 1, newCol())) 648 } 649 if imp.Period == 0 { 650 row = append(row, parquet.Value{}.Level(0, 0, newCol())) 651 } else { 652 row = append(row, parquet.Int64Value(imp.Period).Level(0, 1, newCol())) 653 } 654 newCol() 655 if len(imp.Comments) == 0 { 656 row = append(row, parquet.Value{}.Level(0, 0, col)) 657 } 658 repetition = -1 659 for i := range imp.Comments { 660 if repetition < 1 { 661 repetition++ 662 } 663 row = append(row, parquet.Int64Value(imp.Comments[i]).Level(repetition, 1, col)) 664 } 665 if imp.DefaultSampleType == 0 { 666 row = append(row, parquet.Value{}.Level(0, 0, newCol())) 667 } else { 668 row = append(row, parquet.Int64Value(imp.DefaultSampleType).Level(0, 1, newCol())) 669 } 670 671 newCol() 672 if len(imp.Annotations.Keys) == 0 { 673 row = append(row, parquet.Value{}.Level(0, 0, col)) 674 } 675 repetition = -1 676 for i := range imp.Annotations.Keys { 677 if repetition < 1 { 678 repetition++ 679 } 680 row = append(row, parquet.ByteArrayValue([]byte(imp.Annotations.Keys[i])).Level(repetition, 1, col)) 681 } 682 683 newCol() 684 if len(imp.Annotations.Values) == 0 { 685 row = append(row, parquet.Value{}.Level(0, 0, col)) 686 } 687 repetition = -1 688 for i := range imp.Annotations.Values { 689 if repetition < 1 { 690 repetition++ 691 } 692 row = append(row, parquet.ByteArrayValue([]byte(imp.Annotations.Values[i])).Level(repetition, 1, col)) 693 } 694 695 return row 696 } 697 698 func profileColumnCount(imp InMemoryProfile) int { 699 var totalCols = 10 + (7 * len(imp.Samples.StacktraceIDs)) + len(imp.Comments) + 2*len(imp.Annotations.Keys) 700 if len(imp.Comments) == 0 { 701 totalCols++ 702 } 703 if len(imp.Samples.StacktraceIDs) == 0 { 704 totalCols += 7 705 } 706 if len(imp.Annotations.Keys) == 0 { 707 totalCols += 2 708 } 709 return totalCols 710 } 711 712 func NewMergeProfilesRowReader(rowGroups []parquet.RowReader) parquet.RowReader { 713 if len(rowGroups) == 0 { 714 return phlareparquet.EmptyRowReader 715 } 716 return phlareparquet.NewMergeRowReader(rowGroups, maxProfileRow, lessProfileRows) 717 } 718 719 func lessProfileRows(r1, r2 parquet.Row) bool { 720 // We can directly lookup the series index column and compare it 721 // because it's after only fixed length column 722 sv1, sv2 := r1[seriesIndexColIndex].Uint32(), r2[seriesIndexColIndex].Uint32() 723 if sv1 != sv2 { 724 return sv1 < sv2 725 } 726 // we need to find the TimeNanos column and compare it 727 // but it's after repeated columns, so we search from the end to avoid 728 // going through samples 729 var ts1, ts2 int64 730 for i := len(r1) - 1; i >= 0; i-- { 731 if r1[i].Column() == timeNanoColIndex { 732 ts1 = r1[i].Int64() 733 break 734 } 735 } 736 for i := len(r2) - 1; i >= 0; i-- { 737 if r2[i].Column() == timeNanoColIndex { 738 ts2 = r2[i].Int64() 739 break 740 } 741 } 742 return ts1 < ts2 743 } 744 745 type ProfileRow parquet.Row 746 747 func (p ProfileRow) SeriesIndex() uint32 { 748 return p[seriesIndexColIndex].Uint32() 749 } 750 751 func (p ProfileRow) StacktracePartitionID() uint64 { 752 return p[stacktracePartitionColIndex].Uint64() 753 } 754 755 func (p ProfileRow) TotalValue() int64 { return p[totalValueColIndex].Int64() } 756 757 func (p ProfileRow) TimeNanos() int64 { 758 var ts int64 759 for i := len(p) - 1; i >= 0; i-- { 760 if p[i].Column() == timeNanoColIndex { 761 ts = p[i].Int64() 762 break 763 } 764 } 765 return ts 766 } 767 768 func (p ProfileRow) ForAnnotations(fn func([]parquet.Value, []parquet.Value)) { 769 startKeys := -1 770 endKeys := -1 771 startValues := -1 772 var i int 773 for i = 0; i < len(p); i++ { 774 col := p[i].Column() 775 if col == annotationKeyColumnIndex && p[i].DefinitionLevel() == 1 { 776 if startKeys == -1 { 777 startKeys = i 778 } 779 } 780 if col > annotationKeyColumnIndex && endKeys == -1 { 781 endKeys = i 782 } 783 if col == annotationValueColumnIndex && p[i].DefinitionLevel() == 1 { 784 if startValues == -1 { 785 startValues = i 786 } 787 } 788 if col > annotationValueColumnIndex { 789 break 790 } 791 } 792 793 if startKeys != -1 && startValues != -1 { 794 fn(p[startKeys:endKeys], p[startValues:i]) 795 } 796 } 797 798 func (p ProfileRow) SetSeriesIndex(v uint32) { 799 p[seriesIndexColIndex] = parquet.Int32Value(int32(v)).Level(0, 0, seriesIndexColIndex) 800 } 801 802 func (p ProfileRow) SetStacktracePartitionID(v uint64) { 803 p[stacktracePartitionColIndex] = parquet.Int64Value(int64(v)).Level(0, 0, stacktracePartitionColIndex) 804 } 805 806 func (p ProfileRow) ForStacktraceIDsValues(fn func([]parquet.Value)) { 807 start := -1 808 var i int 809 for i = 0; i < len(p); i++ { 810 col := p[i].Column() 811 if col == stacktraceIDColIndex && p[i].DefinitionLevel() == 1 { 812 if start == -1 { 813 start = i 814 } 815 } 816 if col > stacktraceIDColIndex { 817 break 818 } 819 } 820 if start != -1 { 821 fn(p[start:i]) 822 } 823 } 824 825 func (p ProfileRow) ForStacktraceIdsAndValues(fn func([]parquet.Value, []parquet.Value)) { 826 startStacktraces := -1 827 endStacktraces := -1 828 startValues := -1 829 endValues := -1 830 var i int 831 for i = 0; i < len(p); i++ { 832 col := p[i].Column() 833 if col == stacktraceIDColIndex && p[i].DefinitionLevel() == 1 { 834 if startStacktraces == -1 { 835 startStacktraces = i 836 } 837 } 838 if col > stacktraceIDColIndex && endStacktraces == -1 { 839 endStacktraces = i 840 } 841 if col == valueColIndex && p[i].DefinitionLevel() == 1 { 842 if startValues == -1 { 843 startValues = i 844 } 845 } 846 if col > valueColIndex && endValues == -1 { 847 endValues = i 848 break 849 } 850 } 851 if startStacktraces != -1 && startValues != -1 { 852 fn(p[startStacktraces:endStacktraces], p[startValues:endValues]) 853 } 854 } 855 856 type DownsampledProfileRow parquet.Row 857 858 func (p DownsampledProfileRow) ForValues(fn func([]parquet.Value)) { 859 start := -1 860 var i int 861 for i = 0; i < len(p); i++ { 862 col := p[i].Column() 863 if col == downsampledValueColIndex && p[i].DefinitionLevel() == 1 { 864 if start == -1 { 865 start = i 866 } 867 } 868 if col > downsampledValueColIndex { 869 break 870 } 871 } 872 if start != -1 { 873 fn(p[start:i]) 874 } 875 } 876 877 func (p DownsampledProfileRow) ForAnnotationValues(fn func([]parquet.Value)) { 878 start := -1 879 var i int 880 for i = 0; i < len(p); i++ { 881 col := p[i].Column() 882 if col == downsampledAnnotationValueColIndex && p[i].DefinitionLevel() == 1 { 883 if start == -1 { 884 start = i 885 } 886 } 887 if col > downsampledAnnotationValueColIndex { 888 break 889 } 890 } 891 if start != -1 { 892 fn(p[start:i]) 893 } 894 }