github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/query/iters_test.go (about) 1 package query 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "math" 8 "math/rand" 9 "os" 10 "testing" 11 12 "github.com/parquet-go/parquet-go" 13 "github.com/prometheus/client_golang/prometheus" 14 "github.com/prometheus/client_golang/prometheus/testutil" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 18 "github.com/grafana/pyroscope/pkg/iter" 19 ) 20 21 type makeTestIterFn func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator 22 23 var iterTestCases = []struct { 24 name string 25 makeIter makeTestIterFn 26 }{ 27 {"sync", func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator { 28 return NewSyncIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs) 29 }}, 30 } 31 32 // TestNext compares the unrolled Next() with the original nextSlow() to 33 // prevent drift 34 func TestNext(t *testing.T) { 35 rn1 := RowNumber{0, 0, 0, 0, 0, 0} 36 rn2 := RowNumber{0, 0, 0, 0, 0, 0} 37 38 for i := 0; i < 1000; i++ { 39 r := rand.Intn(6) 40 d := rand.Intn(6) 41 42 rn1.Next(r, d) 43 rn2.nextSlow(r, d) 44 45 require.Equal(t, rn1, rn2) 46 } 47 } 48 49 func TestRowNumber(t *testing.T) { 50 tr := EmptyRowNumber() 51 require.Equal(t, RowNumber{-1, -1, -1, -1, -1, -1}, tr) 52 53 steps := []struct { 54 repetitionLevel int 55 definitionLevel int 56 expected RowNumber 57 }{ 58 // Name.Language.Country examples from the Dremel whitepaper 59 {0, 3, RowNumber{0, 0, 0, 0, -1, -1}}, 60 {2, 2, RowNumber{0, 0, 1, -1, -1, -1}}, 61 {1, 1, RowNumber{0, 1, -1, -1, -1, -1}}, 62 {1, 3, RowNumber{0, 2, 0, 0, -1, -1}}, 63 {0, 1, RowNumber{1, 0, -1, -1, -1, -1}}, 64 } 65 66 for _, step := range steps { 67 tr.Next(step.repetitionLevel, step.definitionLevel) 68 require.Equal(t, step.expected, tr) 69 } 70 } 71 72 func TestCompareRowNumbers(t *testing.T) { 73 testCases := []struct { 74 a, b RowNumber 75 expected int 76 }{ 77 {RowNumber{-1}, RowNumber{0}, -1}, 78 {RowNumber{0}, RowNumber{0}, 0}, 79 {RowNumber{1}, RowNumber{0}, 1}, 80 81 {RowNumber{0, 1}, RowNumber{0, 2}, -1}, 82 {RowNumber{0, 2}, RowNumber{0, 1}, 1}, 83 } 84 85 for _, tc := range testCases { 86 require.Equal(t, tc.expected, CompareRowNumbers(MaxDefinitionLevel, tc.a, tc.b)) 87 } 88 } 89 90 func TestRowNumberPreceding(t *testing.T) { 91 testCases := []struct { 92 start, preceding RowNumber 93 }{ 94 {RowNumber{1000, -1, -1, -1, -1, -1}, RowNumber{999, -1, -1, -1, -1, -1}}, 95 {RowNumber{1000, 0, 0, 0, 0, 0}, RowNumber{999, math.MaxInt64, math.MaxInt64, math.MaxInt64, math.MaxInt64, math.MaxInt64}}, 96 } 97 98 for _, tc := range testCases { 99 require.Equal(t, tc.preceding, tc.start.Preceding()) 100 } 101 } 102 103 func TestColumnIterator(t *testing.T) { 104 for _, tc := range iterTestCases { 105 t.Run(tc.name, func(t *testing.T) { 106 testColumnIterator(t, tc.makeIter) 107 }) 108 } 109 } 110 111 func testColumnIterator(t *testing.T, makeIter makeTestIterFn) { 112 count := 100_000 113 pf := createTestFile(t, count) 114 115 idx, _ := GetColumnIndexByPath(pf.Root(), "A") 116 iter := makeIter(pf, idx, nil, "A") 117 defer iter.Close() 118 119 for i := 0; i < count; i++ { 120 require.True(t, iter.Next()) 121 res := iter.At() 122 require.NotNil(t, res, "i=%d", i) 123 require.Equal(t, RowNumber{int64(i), -1, -1, -1, -1, -1}, res.RowNumber) 124 require.Equal(t, int64(i), res.ToMap()["A"][0].Int64()) 125 } 126 127 require.False(t, iter.Next()) 128 require.NoError(t, iter.Err()) 129 } 130 131 func TestColumnIteratorSeek(t *testing.T) { 132 for _, tc := range iterTestCases { 133 t.Run(tc.name, func(t *testing.T) { 134 testColumnIteratorSeek(t, tc.makeIter) 135 }) 136 } 137 } 138 139 func testColumnIteratorSeek(t *testing.T, makeIter makeTestIterFn) { 140 count := 10_000 141 pf := createTestFile(t, count) 142 143 idx, _ := GetColumnIndexByPath(pf.Root(), "A") 144 iter := makeIter(pf, idx, nil, "A") 145 defer iter.Close() 146 147 seekTos := []int64{ 148 100, 149 1234, 150 4567, 151 5000, 152 7890, 153 } 154 155 for _, seekTo := range seekTos { 156 rn := EmptyRowNumber() 157 rn[0] = seekTo 158 require.True(t, iter.Seek(RowNumberWithDefinitionLevel{rn, 0})) 159 res := iter.At() 160 require.NotNil(t, res, "seekTo=%v", seekTo) 161 require.Equal(t, RowNumber{seekTo, -1, -1, -1, -1, -1}, res.RowNumber) 162 require.Equal(t, seekTo, res.ToMap()["A"][0].Int64()) 163 } 164 } 165 166 func TestColumnIteratorPredicate(t *testing.T) { 167 for _, tc := range iterTestCases { 168 t.Run(tc.name, func(t *testing.T) { 169 testColumnIteratorPredicate(t, tc.makeIter) 170 }) 171 } 172 } 173 174 func testColumnIteratorPredicate(t *testing.T, makeIter makeTestIterFn) { 175 count := 10_000 176 pf := createTestFile(t, count) 177 178 pred := NewIntBetweenPredicate(7001, 7003) 179 180 idx, _ := GetColumnIndexByPath(pf.Root(), "A") 181 iter := makeIter(pf, idx, pred, "A") 182 defer iter.Close() 183 184 expectedResults := []int64{ 185 7001, 186 7002, 187 7003, 188 } 189 190 for _, expectedResult := range expectedResults { 191 require.True(t, iter.Next()) 192 res := iter.At() 193 require.NotNil(t, res) 194 require.Equal(t, RowNumber{expectedResult, -1, -1, -1, -1, -1}, res.RowNumber) 195 require.Equal(t, expectedResult, res.ToMap()["A"][0].Int64()) 196 } 197 } 198 199 func TestColumnIteratorExitEarly(t *testing.T) { 200 type T struct{ A int } 201 202 rows := []T{} 203 count := 10_000 204 for i := 0; i < count; i++ { 205 rows = append(rows, T{i}) 206 } 207 208 pf := createFileWith(t, rows, 2) 209 idx, _ := GetColumnIndexByPath(pf.Root(), "A") 210 readSize := 1000 211 212 readIter := func(iter Iterator) (int, error) { 213 received := 0 214 for iter.Next() { 215 received++ 216 } 217 return received, iter.Err() 218 } 219 220 t.Run("cancelledEarly", func(t *testing.T) { 221 // Cancel before iterating 222 ctx, cancel := context.WithCancel(context.TODO()) 223 cancel() 224 iter := NewSyncIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A") 225 count, err := readIter(iter) 226 require.ErrorContains(t, err, "context canceled") 227 require.Equal(t, 0, count) 228 }) 229 230 t.Run("cancelledPartial", func(t *testing.T) { 231 ctx, cancel := context.WithCancel(context.TODO()) 232 iter := NewSyncIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A") 233 234 // Read some results 235 require.True(t, iter.Next()) 236 237 // Then cancel 238 cancel() 239 240 // Read again = context cancelled 241 _, err := readIter(iter) 242 require.ErrorContains(t, err, "context canceled") 243 }) 244 245 t.Run("closedEarly", func(t *testing.T) { 246 // Close before iterating 247 iter := NewSyncIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A") 248 iter.Close() 249 count, err := readIter(iter) 250 require.ErrorContains(t, err, "context canceled") 251 require.Equal(t, 0, count) 252 }) 253 254 t.Run("closedPartial", func(t *testing.T) { 255 iter := NewSyncIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A") 256 257 // Read some results 258 require.True(t, iter.Next()) 259 260 // Then close 261 iter.Close() 262 263 // Read again = should close early 264 res2, err := readIter(iter) 265 require.ErrorContains(t, err, "context canceled") 266 require.Less(t, readSize+res2, count) 267 }) 268 } 269 270 func BenchmarkColumnIterator(b *testing.B) { 271 for _, tc := range iterTestCases { 272 b.Run(tc.name, func(b *testing.B) { 273 benchmarkColumnIterator(b, tc.makeIter) 274 }) 275 } 276 } 277 278 func benchmarkColumnIterator(b *testing.B, makeIter makeTestIterFn) { 279 count := 100_000 280 pf := createTestFile(b, count) 281 282 idx, _ := GetColumnIndexByPath(pf.Root(), "A") 283 284 b.ResetTimer() 285 286 for i := 0; i < b.N; i++ { 287 iter := makeIter(pf, idx, nil, "A") 288 actualCount := 0 289 for iter.Next() { 290 actualCount++ 291 } 292 iter.Close() 293 require.Equal(b, count, actualCount) 294 // fmt.Println(actualCount) 295 } 296 } 297 298 func createTestFile(t testing.TB, count int) *parquet.File { 299 type T struct{ A int } 300 301 rows := []T{} 302 for i := 0; i < count; i++ { 303 rows = append(rows, T{i}) 304 } 305 306 pf := createFileWith(t, rows, 2) 307 return pf 308 } 309 310 func createProfileLikeFile(t testing.TB, count int) *parquet.File { 311 type T struct { 312 SeriesID uint32 313 TimeNanos int64 314 } 315 316 // every row group is ordered by serieID and then time nanos 317 // time is always increasing between rowgroups 318 319 rowGroups := 10 320 series := 8 321 322 rows := make([]T, count) 323 for i := range rows { 324 325 rowsPerRowGroup := count / rowGroups 326 seriesPerRowGroup := rowsPerRowGroup / series 327 rowGroupNum := i / rowsPerRowGroup 328 329 seriesID := uint32(i % (count / rowGroups) / (rowsPerRowGroup / series)) 330 rows[i] = T{ 331 SeriesID: seriesID, 332 TimeNanos: int64(i%seriesPerRowGroup+rowGroupNum*seriesPerRowGroup) * 1000, 333 } 334 335 } 336 337 return createFileWith[T](t, rows, rowGroups) 338 } 339 340 func createFileWith[T any](t testing.TB, rows []T, rowGroups int) *parquet.File { 341 f, err := os.CreateTemp(t.TempDir(), "data.parquet") 342 require.NoError(t, err) 343 t.Logf("Created temp file %s", f.Name()) 344 345 perRG := len(rows) / rowGroups 346 347 w := parquet.NewGenericWriter[T](f) 348 for i := 0; i < (rowGroups - 1); i++ { 349 _, err = w.Write(rows[0:perRG]) 350 require.NoError(t, err) 351 require.NoError(t, w.Flush()) 352 rows = rows[perRG:] 353 } 354 355 _, err = w.Write(rows) 356 require.NoError(t, err) 357 require.NoError(t, w.Flush()) 358 359 require.NoError(t, w.Close()) 360 361 stat, err := f.Stat() 362 require.NoError(t, err) 363 364 pf, err := parquet.OpenFile(f, stat.Size()) 365 require.NoError(t, err) 366 367 return pf 368 } 369 370 func TestBinaryJoinIterator(t *testing.T) { 371 rowCount := 1600 372 pf := createProfileLikeFile(t, rowCount) 373 374 for _, tc := range []struct { 375 name string 376 seriesPredicate Predicate 377 seriesPageReads int 378 timePredicate Predicate 379 timePageReads int 380 expectedResultCount int 381 }{ 382 { 383 name: "no predicate", 384 expectedResultCount: rowCount, // expect everything 385 seriesPageReads: 10, 386 timePageReads: 10, 387 }, 388 { 389 name: "one series ID", 390 expectedResultCount: rowCount / 8, // expect an eight of the rows 391 seriesPredicate: NewMapPredicate(map[int64]struct{}{0: {}}), 392 seriesPageReads: 10, 393 timePageReads: 10, 394 }, 395 { 396 name: "two series IDs", 397 expectedResultCount: rowCount / 8 * 2, // expect two eights of the rows 398 seriesPredicate: NewMapPredicate(map[int64]struct{}{0: {}, 1: {}}), 399 seriesPageReads: 10, 400 timePageReads: 10, 401 }, 402 { 403 name: "missing series", 404 expectedResultCount: 0, 405 seriesPredicate: NewMapPredicate(map[int64]struct{}{10: {}}), 406 }, 407 { 408 name: "first two time stamps each", 409 expectedResultCount: 2 * 8, // expect two profiles for each series 410 timePredicate: NewIntBetweenPredicate(0, 1000), 411 seriesPageReads: 1, 412 timePageReads: 1, 413 }, 414 { 415 name: "time before results", 416 expectedResultCount: 0, 417 timePredicate: NewIntBetweenPredicate(-10, -1), 418 seriesPageReads: 1, 419 timePageReads: 0, 420 }, 421 { 422 name: "time after results", 423 expectedResultCount: 0, 424 timePredicate: NewIntBetweenPredicate(200000, 20001000), 425 seriesPredicate: NewMapPredicate(map[int64]struct{}{0: {}, 1: {}}), 426 seriesPageReads: 1, 427 timePageReads: 0, 428 }, 429 } { 430 t.Run(tc.name, func(t *testing.T) { 431 ctx, cancel := context.WithCancel(context.Background()) 432 defer cancel() 433 434 reg := prometheus.NewRegistry() 435 metrics := NewMetrics(reg) 436 metrics.pageReadsTotal.WithLabelValues("ts", "SeriesId").Add(0) 437 metrics.pageReadsTotal.WithLabelValues("ts", "TimeNanos").Add(0) 438 ctx = AddMetricsToContext(ctx, metrics) 439 440 seriesIt := NewSyncIterator(ctx, pf.RowGroups(), 0, "SeriesId", 1000, tc.seriesPredicate, "SeriesId") 441 timeIt := NewSyncIterator(ctx, pf.RowGroups(), 1, "TimeNanos", 1000, tc.timePredicate, "TimeNanos") 442 443 it := NewBinaryJoinIterator( 444 0, 445 seriesIt, 446 timeIt, 447 ) 448 449 results := 0 450 for it.Next() { 451 results++ 452 } 453 require.NoError(t, it.Err()) 454 455 require.NoError(t, it.Close()) 456 457 require.Equal(t, tc.expectedResultCount, results) 458 459 require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewReader([]byte(fmt.Sprintf( 460 ` 461 # HELP pyroscopedb_page_reads_total Total number of pages read while querying 462 # TYPE pyroscopedb_page_reads_total counter 463 pyroscopedb_page_reads_total{column="SeriesId",table="ts"} %d 464 pyroscopedb_page_reads_total{column="TimeNanos",table="ts"} %d 465 `, tc.seriesPageReads, tc.timePageReads))), "pyroscopedb_page_reads_total")) 466 }) 467 } 468 } 469 470 type rowGetter int64 471 472 func (r rowGetter) RowNumber() int64 { 473 return int64(r) 474 } 475 476 func TestRowNumberIterator(t *testing.T) { 477 rows := []rowGetter{1, 2, 3, 50, 100, 102, 200} 478 479 t.Run("iterate over all", func(t *testing.T) { 480 it := NewRowNumberIterator(iter.NewSliceIterator(rows)) 481 result := []int64{} 482 for it.Next() { 483 result = append(result, it.At().RowNumber[0]) 484 } 485 require.NoError(t, it.Err()) 486 assert.Equal(t, []int64{1, 2, 3, 50, 100, 102, 200}, result) 487 }) 488 489 t.Run("seek into iter", func(t *testing.T) { 490 it := NewRowNumberIterator(iter.NewSliceIterator(rows)) 491 492 to := EmptyRowNumber() 493 to[0] = 100 494 require.True(t, it.Seek(RowNumberWithDefinitionLevel{RowNumber: to})) 495 result := []int64{it.At().RowNumber[0]} 496 for it.Next() { 497 result = append(result, it.At().RowNumber[0]) 498 } 499 require.NoError(t, it.Err()) 500 assert.Equal(t, []int64{100, 102, 200}, result) 501 }) 502 503 t.Run("seek to non existing value", func(t *testing.T) { 504 it := NewRowNumberIterator(iter.NewSliceIterator(rows)) 505 to := EmptyRowNumber() 506 to[0] = 10 507 require.True(t, it.Seek(RowNumberWithDefinitionLevel{RowNumber: to})) 508 result := []int64{it.At().RowNumber[0]} 509 for it.Next() { 510 result = append(result, it.At().RowNumber[0]) 511 } 512 require.NoError(t, it.Err()) 513 assert.Equal(t, []int64{50, 100, 102, 200}, result) 514 }) 515 516 t.Run("seek beyond rows", func(t *testing.T) { 517 it := NewRowNumberIterator(iter.NewSliceIterator(rows)) 518 to := EmptyRowNumber() 519 to[0] = 300 520 require.False(t, it.Seek(RowNumberWithDefinitionLevel{RowNumber: to})) 521 require.NoError(t, it.Err()) 522 }) 523 524 t.Run("underlying iterator not ordered", func(t *testing.T) { 525 it := NewRowNumberIterator(iter.NewSliceIterator(append(rows, 300, 210, 500))) 526 for it.Next() { 527 } 528 require.ErrorContains(t, it.Err(), "is not sorted") 529 }) 530 }