github.com/grafana/pyroscope@v1.18.0/pkg/block/section_profiles_test.go (about)

     1  package block
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"io"
     7  	"testing"
     8  
     9  	"github.com/parquet-go/parquet-go"
    10  	"github.com/stretchr/testify/require"
    11  
    12  	"github.com/grafana/pyroscope/pkg/objstore/testutil"
    13  )
    14  
    15  func createParquetFile[T any](t testing.TB, f io.Writer, rows []T, rowGroups int) {
    16  	perRG := len(rows) / rowGroups
    17  
    18  	w := parquet.NewGenericWriter[T](f)
    19  	for i := 0; i < (rowGroups - 1); i++ {
    20  		_, err := w.Write(rows[0:perRG])
    21  		require.NoError(t, err)
    22  		require.NoError(t, w.Flush())
    23  		rows = rows[perRG:]
    24  	}
    25  
    26  	_, err := w.Write(rows)
    27  	require.NoError(t, err)
    28  	require.NoError(t, w.Flush())
    29  
    30  	require.NoError(t, w.Close())
    31  }
    32  
    33  func createParquetTestFile(t testing.TB, f io.Writer, count int) {
    34  	type T struct{ A int }
    35  
    36  	rows := []T{}
    37  	for i := 0; i < count; i++ {
    38  		rows = append(rows, T{i})
    39  	}
    40  
    41  	createParquetFile(t, f, rows, 4)
    42  }
    43  
    44  func validateColumnIndex(t *testing.T, pf *parquet.File, count int) {
    45  	rgs := pf.RowGroups()
    46  	require.Equal(t, 4, len(rgs))
    47  
    48  	// check last row groups column index
    49  	ci, err := rgs[3].ColumnChunks()[0].ColumnIndex()
    50  	require.NoError(t, err)
    51  
    52  	pages := ci.NumPages()
    53  	require.Equal(t, int64(count-1), ci.MaxValue(pages-1).Int64())
    54  }
    55  
    56  func Test_openParquetFile(t *testing.T) {
    57  	path := "test.parquet"
    58  	ctx := context.Background()
    59  	bucket, _ := testutil.NewFilesystemBucket(t, ctx, t.TempDir())
    60  
    61  	buf := bytes.NewBuffer(nil)
    62  	count := 100
    63  	createParquetTestFile(t, buf, count)
    64  
    65  	actualFooterSize := footerSize(buf.Bytes())
    66  
    67  	err := bucket.Upload(ctx, path, bytes.NewReader(buf.Bytes()))
    68  	require.NoError(t, err)
    69  
    70  	pathOffset := "test.offset.parquet"
    71  	require.NoError(t, bucket.Upload(ctx, pathOffset, bytes.NewReader(append(bytes.Repeat([]byte{0xab}, 16), buf.Bytes()...))))
    72  
    73  	opts := []parquet.FileOption{
    74  		parquet.SkipBloomFilters(true),
    75  	}
    76  
    77  	t.Run("withFooterSizeSmallerThanEstimate", func(t *testing.T) {
    78  		pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), actualFooterSize*2, opts...)
    79  		require.NoError(t, err)
    80  
    81  		validateColumnIndex(t, pf.File, count)
    82  	})
    83  
    84  	t.Run("withFooterSizeExactEstimate", func(t *testing.T) {
    85  		pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), actualFooterSize, opts...)
    86  		require.NoError(t, err)
    87  
    88  		validateColumnIndex(t, pf.File, count)
    89  	})
    90  	t.Run("withFooterSizeSmallerEstimate", func(t *testing.T) {
    91  		pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), 200, opts...)
    92  		require.NoError(t, err)
    93  
    94  		validateColumnIndex(t, pf.File, count)
    95  	})
    96  	t.Run("withFooterSizeVerySmall", func(t *testing.T) {
    97  		pf, err := openParquetFile(bucket, path, 0, int64(buf.Len()), 1, opts...)
    98  		require.NoError(t, err)
    99  
   100  		validateColumnIndex(t, pf.File, count)
   101  	})
   102  
   103  	t.Run("withOffsetAndFooterSmallerThanEstimate", func(t *testing.T) {
   104  		pf, err := openParquetFile(bucket, pathOffset, 16, int64(buf.Len()), actualFooterSize*2, opts...)
   105  		require.NoError(t, err)
   106  		validateColumnIndex(t, pf.File, count)
   107  	})
   108  
   109  }