github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/schemas/v1/schema_test.go (about) 1 package v1 2 3 import ( 4 "bytes" 5 "io" 6 "strings" 7 "testing" 8 9 "github.com/google/uuid" 10 "github.com/parquet-go/parquet-go" 11 "github.com/stretchr/testify/assert" 12 "github.com/stretchr/testify/require" 13 14 profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 15 ) 16 17 // This test ensures that the structs that are stored and the used schema matches 18 func TestSchemaMatch(t *testing.T) { 19 // TODO: Unfortunately the upstream schema doesn't correctly produce a 20 // schema of a List of a struct pointer. This replaces this in the schema 21 // comparison, because this has no affect to our construct/reconstruct code 22 // we can simply replace the string in the schema. 23 profilesStructSchema := strings.ReplaceAll( 24 parquet.SchemaOf(&Profile{}).String(), 25 "optional group element", 26 "required group element", 27 ) 28 29 require.Equal(t, profilesStructSchema, ProfilesSchema.String()) 30 31 stacktracesStructSchema := parquet.SchemaOf(&storedStacktrace{}) 32 require.Equal(t, strings.Replace(stacktracesStructSchema.String(), "message storedStacktrace", "message Stacktrace", 1), stacktracesSchema.String()) 33 } 34 35 func newStacktraces() []*Stacktrace { 36 return []*Stacktrace{ 37 {LocationIDs: []uint64{0x11}}, 38 {LocationIDs: []uint64{}}, 39 {LocationIDs: []uint64{12, 13}}, 40 {LocationIDs: []uint64{}}, 41 {LocationIDs: []uint64{14, 15}}, 42 } 43 } 44 45 func TestStacktracesRoundTrip(t *testing.T) { 46 var ( 47 s = newStacktraces() 48 w = &ReadWriter[*Stacktrace, *StacktracePersister]{} 49 buf bytes.Buffer 50 ) 51 52 require.NoError(t, w.WriteParquetFile(&buf, s)) 53 54 sRead, err := w.ReadParquetFile(bytes.NewReader(buf.Bytes())) 55 require.NoError(t, err) 56 assert.Equal(t, newStacktraces(), sRead) 57 } 58 59 func newStrings() []string { 60 return []string{ 61 "", 62 "foo", 63 "bar", 64 "baz", 65 "", 66 } 67 } 68 69 func TestStringsRoundTrip(t *testing.T) { 70 var ( 71 s = newStrings() 72 w = &ReadWriter[string, StringPersister]{} 73 buf bytes.Buffer 74 ) 75 76 require.NoError(t, w.WriteParquetFile(&buf, s)) 77 78 sRead, err := w.ReadParquetFile(bytes.NewReader(buf.Bytes())) 79 require.NoError(t, err) 80 assert.Equal(t, newStrings(), sRead) 81 } 82 83 func newProfiles() []*Profile { 84 return []*Profile{ 85 { 86 ID: uuid.MustParse("00000000-0000-0000-0000-000000000001"), 87 TimeNanos: 1001, 88 SeriesIndex: 0xaa, 89 Samples: []*Sample{ 90 { 91 StacktraceID: 0xba, 92 Value: 0xca, 93 Labels: []*profilev1.Label{}, 94 }, 95 { 96 StacktraceID: 0xbb, 97 Value: 0xca, 98 Labels: []*profilev1.Label{ 99 {Key: 0xda, Str: 0xea}, 100 }, 101 }, 102 }, 103 Comments: []int64{}, 104 Annotations: []*Annotation{}, 105 }, 106 { 107 ID: uuid.MustParse("00000000-0000-0000-0000-000000000001"), 108 TimeNanos: 1001, 109 SeriesIndex: 0xab, 110 Samples: []*Sample{ 111 { 112 StacktraceID: 0xba, 113 Value: 0xcc, 114 Labels: []*profilev1.Label{}, 115 }, 116 { 117 StacktraceID: 0xbb, 118 Value: 0xcc, 119 Labels: []*profilev1.Label{ 120 {Key: 0xda, Str: 0xea}, 121 }, 122 }, 123 }, 124 Comments: []int64{}, 125 Annotations: []*Annotation{}, 126 }, 127 { 128 ID: uuid.MustParse("00000000-0000-0000-0000-000000000002"), 129 SeriesIndex: 0xab, 130 TimeNanos: 1002, 131 Samples: []*Sample{ 132 { 133 StacktraceID: 0xbc, 134 Value: 0xcd, 135 Labels: []*profilev1.Label{}, 136 }, 137 }, 138 Comments: []int64{}, 139 Annotations: []*Annotation{{Key: "key", Value: "test annotation"}}, 140 }, 141 { 142 ID: uuid.MustParse("00000000-0000-0000-0000-000000000002"), 143 SeriesIndex: 0xac, 144 TimeNanos: 1002, 145 Samples: []*Sample{ 146 { 147 StacktraceID: 0xbc, 148 Value: 0xce, 149 Labels: []*profilev1.Label{}, 150 }, 151 }, 152 Comments: []int64{}, 153 Annotations: []*Annotation{}, 154 }, 155 } 156 } 157 158 func TestProfilesRoundTrip(t *testing.T) { 159 var ( 160 p = newProfiles() 161 w = &ReadWriter[*Profile, *ProfilePersister]{} 162 buf bytes.Buffer 163 ) 164 165 require.NoError(t, w.WriteParquetFile(&buf, p)) 166 167 sRead, err := w.ReadParquetFile(bytes.NewReader(buf.Bytes())) 168 require.NoError(t, err) 169 assert.Equal(t, newProfiles(), sRead) 170 } 171 172 func TestLocationsRoundTrip(t *testing.T) { 173 raw := []*profilev1.Location{ 174 { 175 Id: 8, 176 Address: 9, 177 MappingId: 10, 178 Line: []*profilev1.Line{ 179 { 180 FunctionId: 11, 181 Line: 12, 182 }, 183 { 184 FunctionId: 13, 185 Line: 14, 186 }, 187 }, 188 IsFolded: true, 189 }, 190 { 191 Id: 1, 192 Address: 2, 193 MappingId: 3, 194 Line: []*profilev1.Line{ 195 { 196 FunctionId: 4, 197 Line: 5, 198 }, 199 { 200 FunctionId: 6, 201 Line: 7, 202 }, 203 }, 204 IsFolded: false, 205 }, 206 { 207 Id: 10, 208 Address: 11, 209 MappingId: 12, 210 // both pprofLocationPersister and LocationPersister deserialize as empty slice, not nil 211 Line: nil, 212 IsFolded: false, 213 }, 214 { 215 Id: 10, 216 Address: 11, 217 MappingId: 12, 218 Line: make([]*profilev1.Line, 0), 219 IsFolded: false, 220 }, 221 } 222 223 mem := []InMemoryLocation{ 224 { 225 Id: 8, 226 Address: 9, 227 MappingId: 10, 228 Line: []InMemoryLine{ 229 { 230 FunctionId: 11, 231 Line: 12, 232 }, 233 { 234 FunctionId: 13, 235 Line: 14, 236 }, 237 }, 238 IsFolded: true, 239 }, 240 { 241 Id: 1, 242 Address: 2, 243 MappingId: 3, 244 Line: []InMemoryLine{ 245 { 246 FunctionId: 4, 247 Line: 5, 248 }, 249 { 250 FunctionId: 6, 251 Line: 7, 252 }, 253 }, 254 IsFolded: false, 255 }, 256 { 257 Id: 10, 258 Address: 11, 259 MappingId: 12, 260 // both pprofLocationPersister and LocationPersister deserialize as empty slice, not nil 261 Line: nil, 262 IsFolded: false, 263 }, 264 { 265 Id: 10, 266 Address: 11, 267 MappingId: 12, 268 Line: make([]InMemoryLine, 0), 269 IsFolded: false, 270 }, 271 } 272 273 expectedMem := func() []InMemoryLocation { 274 res := make([]InMemoryLocation, len(mem)) 275 for i, loc := range mem { 276 if loc.Line == nil { 277 loc.Line = make([]InMemoryLine, 0) 278 } 279 res[i] = loc 280 } 281 return res 282 } 283 284 expectedRaw := func() []*profilev1.Location { 285 res := make([]*profilev1.Location, len(raw)) 286 for i, loc := range raw { 287 cloned := loc.CloneVT() 288 if cloned.Line == nil { 289 cloned.Line = make([]*profilev1.Line, 0) 290 } 291 res[i] = cloned 292 } 293 return res 294 } 295 296 var buf bytes.Buffer 297 require.NoError(t, new(ReadWriter[*profilev1.Location, pprofLocationPersister]).WriteParquetFile(&buf, raw)) 298 actualMem, err := new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) 299 require.NoError(t, err) 300 assert.Equal(t, expectedMem(), actualMem) 301 302 buf.Reset() 303 require.NoError(t, new(ReadWriter[InMemoryLocation, LocationPersister]).WriteParquetFile(&buf, mem)) 304 actualMem, err = new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) 305 require.NoError(t, err) 306 assert.Equal(t, expectedMem(), actualMem) 307 308 buf.Reset() 309 require.NoError(t, new(ReadWriter[*profilev1.Location, pprofLocationPersister]).WriteParquetFile(&buf, raw)) 310 actualRaw, err := new(ReadWriter[*profilev1.Location, pprofLocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) 311 require.NoError(t, err) 312 assert.Equal(t, expectedRaw(), actualRaw) 313 } 314 315 var protoLocationsSchema = parquet.SchemaOf(&profilev1.Location{}) 316 317 type pprofLocationPersister struct{} 318 319 func (pprofLocationPersister) Name() string { return "locations" } 320 321 func (pprofLocationPersister) Schema() *parquet.Schema { return protoLocationsSchema } 322 323 func (pprofLocationPersister) Deconstruct(row parquet.Row, loc *profilev1.Location) parquet.Row { 324 row = protoLocationsSchema.Deconstruct(row, loc) 325 return row 326 } 327 328 func (pprofLocationPersister) Reconstruct(row parquet.Row) (*profilev1.Location, error) { 329 var loc profilev1.Location 330 if err := protoLocationsSchema.Reconstruct(&loc, row); err != nil { 331 return nil, err 332 } 333 return &loc, nil 334 } 335 336 func TestFunctionsRoundTrip(t *testing.T) { 337 raw := []*profilev1.Function{ 338 { 339 Id: 6, 340 Name: 7, 341 SystemName: 8, 342 Filename: 9, 343 StartLine: 10, 344 }, 345 { 346 Id: 1, 347 Name: 2, 348 SystemName: 3, 349 Filename: 4, 350 StartLine: 5, 351 }, 352 } 353 354 mem := []InMemoryFunction{ 355 { 356 Id: 6, 357 Name: 7, 358 SystemName: 8, 359 Filename: 9, 360 StartLine: 10, 361 }, 362 { 363 Id: 1, 364 Name: 2, 365 SystemName: 3, 366 Filename: 4, 367 StartLine: 5, 368 }, 369 } 370 371 var buf bytes.Buffer 372 require.NoError(t, new(ReadWriter[*profilev1.Function, *pprofFunctionPersister]).WriteParquetFile(&buf, raw)) 373 actual, err := new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) 374 require.NoError(t, err) 375 assert.Equal(t, mem, actual) 376 377 buf.Reset() 378 require.NoError(t, new(ReadWriter[InMemoryFunction, FunctionPersister]).WriteParquetFile(&buf, mem)) 379 actual, err = new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) 380 require.NoError(t, err) 381 assert.Equal(t, mem, actual) 382 } 383 384 var protoFunctionSchema = parquet.SchemaOf(&profilev1.Function{}) 385 386 type pprofFunctionPersister struct{} 387 388 func (*pprofFunctionPersister) Name() string { return "functions" } 389 390 func (*pprofFunctionPersister) Schema() *parquet.Schema { return protoFunctionSchema } 391 392 func (*pprofFunctionPersister) Deconstruct(row parquet.Row, loc *profilev1.Function) parquet.Row { 393 row = protoFunctionSchema.Deconstruct(row, loc) 394 return row 395 } 396 397 func (*pprofFunctionPersister) Reconstruct(row parquet.Row) (*profilev1.Function, error) { 398 var fn profilev1.Function 399 if err := protoFunctionSchema.Reconstruct(&fn, row); err != nil { 400 return nil, err 401 } 402 return &fn, nil 403 } 404 405 func TestMappingsRoundTrip(t *testing.T) { 406 raw := []*profilev1.Mapping{ 407 { 408 Id: 7, 409 MemoryStart: 8, 410 MemoryLimit: 9, 411 FileOffset: 10, 412 Filename: 11, 413 BuildId: 12, 414 HasFunctions: true, 415 HasFilenames: false, 416 HasLineNumbers: true, 417 HasInlineFrames: false, 418 }, 419 { 420 Id: 1, 421 MemoryStart: 2, 422 MemoryLimit: 3, 423 FileOffset: 4, 424 Filename: 5, 425 BuildId: 6, 426 HasFunctions: false, 427 HasFilenames: true, 428 HasLineNumbers: false, 429 HasInlineFrames: true, 430 }, 431 } 432 433 mem := []InMemoryMapping{ 434 { 435 Id: 7, 436 MemoryStart: 8, 437 MemoryLimit: 9, 438 FileOffset: 10, 439 Filename: 11, 440 BuildId: 12, 441 HasFunctions: true, 442 HasFilenames: false, 443 HasLineNumbers: true, 444 HasInlineFrames: false, 445 }, 446 { 447 Id: 1, 448 MemoryStart: 2, 449 MemoryLimit: 3, 450 FileOffset: 4, 451 Filename: 5, 452 BuildId: 6, 453 HasFunctions: false, 454 HasFilenames: true, 455 HasLineNumbers: false, 456 HasInlineFrames: true, 457 }, 458 } 459 460 var buf bytes.Buffer 461 require.NoError(t, new(ReadWriter[*profilev1.Mapping, *pprofMappingPersister]).WriteParquetFile(&buf, raw)) 462 actual, err := new(ReadWriter[InMemoryMapping, MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) 463 require.NoError(t, err) 464 assert.Equal(t, mem, actual) 465 466 // buf.Reset() 467 // require.NoError(t, new(ReadWriter[*InMemoryMapping, *MappingPersister]).WriteParquetFile(&buf, mem)) 468 // actual, err = new(ReadWriter[*InMemoryMapping, *MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) 469 // require.NoError(t, err) 470 // assert.Equal(t, mem, actual) 471 } 472 473 var protoMappingSchema = parquet.SchemaOf(&profilev1.Mapping{}) 474 475 type pprofMappingPersister struct{} 476 477 func (*pprofMappingPersister) Name() string { return "mappings" } 478 479 func (*pprofMappingPersister) Schema() *parquet.Schema { return protoMappingSchema } 480 481 func (*pprofMappingPersister) Deconstruct(row parquet.Row, loc *profilev1.Mapping) parquet.Row { 482 row = protoMappingSchema.Deconstruct(row, loc) 483 return row 484 } 485 486 func (*pprofMappingPersister) Reconstruct(row parquet.Row) (*profilev1.Mapping, error) { 487 var m profilev1.Mapping 488 if err := protoMappingSchema.Reconstruct(&m, row); err != nil { 489 return nil, err 490 } 491 return &m, nil 492 } 493 494 type ReadWriter[T any, P Persister[T]] struct{} 495 496 func (r *ReadWriter[T, P]) WriteParquetFile(file io.Writer, elements []T) error { 497 var ( 498 persister P 499 rows = make([]parquet.Row, len(elements)) 500 ) 501 502 buffer := parquet.NewBuffer(persister.Schema()) 503 504 for pos := range rows { 505 rows[pos] = persister.Deconstruct(rows[pos], elements[pos]) 506 } 507 508 if _, err := buffer.WriteRows(rows); err != nil { 509 return err 510 } 511 512 writer := parquet.NewWriter(file, persister.Schema()) 513 if _, err := parquet.CopyRows(writer, buffer.Rows()); err != nil { 514 return err 515 } 516 517 return writer.Close() 518 } 519 520 func (*ReadWriter[T, P]) ReadParquetFile(file io.ReaderAt) ([]T, error) { 521 var ( 522 persister P 523 reader = parquet.NewReader(file, persister.Schema()) 524 ) 525 defer reader.Close() 526 527 rows := make([]parquet.Row, reader.NumRows()) 528 if _, err := reader.ReadRows(rows); err != nil { 529 return nil, err 530 } 531 532 var ( 533 elements = make([]T, reader.NumRows()) 534 err error 535 ) 536 for pos := range elements { 537 elements[pos], err = persister.Reconstruct(rows[pos]) 538 if err != nil { 539 return nil, err 540 } 541 } 542 543 return elements, nil 544 }