github.com/grafana/pyroscope@v1.18.0/pkg/block/section_profiles_test.go (about) 1 package block 2 3 import ( 4 "bytes" 5 "context" 6 "io" 7 "testing" 8 9 "github.com/parquet-go/parquet-go" 10 "github.com/stretchr/testify/require" 11 12 "github.com/grafana/pyroscope/pkg/objstore/testutil" 13 ) 14 15 func createParquetFile[T any](t testing.TB, f io.Writer, rows []T, rowGroups int) { 16 perRG := len(rows) / rowGroups 17 18 w := parquet.NewGenericWriter[T](f) 19 for i := 0; i < (rowGroups - 1); i++ { 20 _, err := w.Write(rows[0:perRG]) 21 require.NoError(t, err) 22 require.NoError(t, w.Flush()) 23 rows = rows[perRG:] 24 } 25 26 _, err := w.Write(rows) 27 require.NoError(t, err) 28 require.NoError(t, w.Flush()) 29 30 require.NoError(t, w.Close()) 31 } 32 33 func createParquetTestFile(t testing.TB, f io.Writer, count int) { 34 type T struct{ A int } 35 36 rows := []T{} 37 for i := 0; i < count; i++ { 38 rows = append(rows, T{i}) 39 } 40 41 createParquetFile(t, f, rows, 4) 42 } 43 44 func validateColumnIndex(t *testing.T, pf *parquet.File, count int) { 45 rgs := pf.RowGroups() 46 require.Equal(t, 4, len(rgs)) 47 48 // check last row groups column index 49 ci, err := rgs[3].ColumnChunks()[0].ColumnIndex() 50 require.NoError(t, err) 51 52 pages := ci.NumPages() 53 require.Equal(t, int64(count-1), ci.MaxValue(pages-1).Int64()) 54 } 55 56 func Test_openParquetFile(t *testing.T) { 57 path := "test.parquet" 58 ctx := context.Background() 59 bucket, _ := testutil.NewFilesystemBucket(t, ctx, t.TempDir()) 60 61 buf := bytes.NewBuffer(nil) 62 count := 100 63 createParquetTestFile(t, buf, count) 64 65 actualFooterSize := footerSize(buf.Bytes()) 66 67 err := bucket.Upload(ctx, path, bytes.NewReader(buf.Bytes())) 68 require.NoError(t, err) 69 70 pathOffset := "test.offset.parquet" 71 require.NoError(t, bucket.Upload(ctx, pathOffset, bytes.NewReader(append(bytes.Repeat([]byte{0xab}, 16), buf.Bytes()...)))) 72 73 opts := []parquet.FileOption{ 74 parquet.SkipBloomFilters(true), 75 } 76 77 t.Run("withFooterSizeSmallerThanEstimate", func(t *testing.T) { 78 pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), actualFooterSize*2, opts...) 79 require.NoError(t, err) 80 81 validateColumnIndex(t, pf.File, count) 82 }) 83 84 t.Run("withFooterSizeExactEstimate", func(t *testing.T) { 85 pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), actualFooterSize, opts...) 86 require.NoError(t, err) 87 88 validateColumnIndex(t, pf.File, count) 89 }) 90 t.Run("withFooterSizeSmallerEstimate", func(t *testing.T) { 91 pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), 200, opts...) 92 require.NoError(t, err) 93 94 validateColumnIndex(t, pf.File, count) 95 }) 96 t.Run("withFooterSizeVerySmall", func(t *testing.T) { 97 pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), 1, opts...) 98 require.NoError(t, err) 99 100 validateColumnIndex(t, pf.File, count) 101 }) 102 103 t.Run("withOffsetAndFooterSmallerThanEstimate", func(t *testing.T) { 104 pf, err := openParquetFile(bucket, pathOffset, 16, int64(buf.Len()), actualFooterSize*2, opts...) 105 require.NoError(t, err) 106 validateColumnIndex(t, pf.File, count) 107 }) 108 109 }