github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/downsample/downsample_test.go (about)

     1  package downsample
     2  
     3  import (
     4  	"os"
     5  	"path/filepath"
     6  	"testing"
     7  
     8  	"github.com/go-kit/log"
     9  	"github.com/parquet-go/parquet-go"
    10  	"github.com/prometheus/common/model"
    11  	"github.com/stretchr/testify/assert"
    12  	"github.com/stretchr/testify/require"
    13  
    14  	phlareparquet "github.com/grafana/pyroscope/pkg/parquet"
    15  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    16  	schemav1testhelper "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1/testhelper"
    17  	"github.com/grafana/pyroscope/pkg/pprof/testhelper"
    18  )
    19  
    20  func TestDownsampler_ProfileCounts(t *testing.T) {
    21  	outDir := t.TempDir()
    22  	d, err := NewDownsampler(outDir, log.NewNopLogger())
    23  	require.NoError(t, err)
    24  
    25  	f, err := os.Open("../testdata/01HHYG6245NWHZWVP27V8WJRT7/profiles.parquet")
    26  	require.NoError(t, err)
    27  	defer func() {
    28  		require.NoError(t, f.Close())
    29  	}()
    30  
    31  	reader := parquet.NewReader(f, schemav1.ProfilesSchema)
    32  	rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024)
    33  	require.NoError(t, err)
    34  
    35  	for _, row := range rows {
    36  		err = d.AddRow(schemav1.ProfileRow(row), 1)
    37  		require.NoError(t, err)
    38  	}
    39  
    40  	err = d.Close()
    41  	require.NoError(t, err)
    42  
    43  	verifyProfileCount(t, outDir, "profiles_5m_sum.parquet", 1869)
    44  	verifyProfileCount(t, outDir, "profiles_1h_sum.parquet", 50)
    45  }
    46  
    47  func TestDownsampler_Aggregation(t *testing.T) {
    48  	profiles := make([]schemav1.InMemoryProfile, 0)
    49  	builder := testhelper.NewProfileBuilder(1703853310000000000).CPUProfile() // 2023-12-29T12:35:10Z
    50  	builder.ForStacktraceString("a", "b", "c").AddSamples(30)
    51  	builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20)
    52  	builder.WithAnnotations("test annotation 1")
    53  	batch, _ := schemav1testhelper.NewProfileSchema(builder, "cpu")
    54  	profiles = append(profiles, batch...)
    55  
    56  	builder = testhelper.NewProfileBuilder(1703853559000000000).CPUProfile() // 2023-12-29T12:39:19Z
    57  	builder.ForStacktraceString("a", "b", "c").AddSamples(40)
    58  	builder.ForStacktraceString("a", "b", "c", "d").AddSamples(30)
    59  	builder.ForStacktraceString("a", "b", "c", "d", "e").AddSamples(20)
    60  	batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu")
    61  	profiles = append(profiles, batch...)
    62  
    63  	builder = testhelper.NewProfileBuilder(1703854209000000000).CPUProfile() // 2023-12-29T12:50:09Z
    64  	builder.ForStacktraceString("a", "b", "c").AddSamples(40)
    65  	builder.ForStacktraceString("a", "b", "c", "d").AddSamples(30)
    66  	batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu")
    67  	profiles = append(profiles, batch...)
    68  
    69  	builder = testhelper.NewProfileBuilder(1703858409000000000).CPUProfile() // 2023-12-29T14:00:09Z
    70  	builder.ForStacktraceString("a", "b", "c").AddSamples(30)
    71  	builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20)
    72  	builder.WithAnnotations("test annotation 2")
    73  	batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu")
    74  	profiles = append(profiles, batch...)
    75  
    76  	reader := schemav1.NewInMemoryProfilesRowReader(profiles)
    77  	rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024)
    78  	require.NoError(t, err)
    79  
    80  	outDir := t.TempDir()
    81  	d, err := NewDownsampler(outDir, log.NewNopLogger())
    82  	require.NoError(t, err)
    83  
    84  	for _, row := range rows {
    85  		err = d.AddRow(schemav1.ProfileRow(row), 1)
    86  		require.NoError(t, err)
    87  	}
    88  
    89  	err = d.Close()
    90  	require.NoError(t, err)
    91  
    92  	downsampledRows := readDownsampledRows(t, filepath.Join(outDir, "profiles_5m_sum.parquet"), 3)
    93  
    94  	schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) {
    95  		require.Equal(t, 3, len(values))
    96  		require.Equal(t, int64(70), values[0].Int64()) // a, b, c
    97  		require.Equal(t, int64(50), values[1].Int64()) // a, b, c, d
    98  		require.Equal(t, int64(20), values[2].Int64()) // a, b, c, d, e
    99  	})
   100  
   101  	annotations := make([]string, 0)
   102  	schemav1.DownsampledProfileRow(downsampledRows[0]).ForAnnotationValues(func(values []parquet.Value) {
   103  		annotations = append(annotations, values[0].String())
   104  	})
   105  	require.Equal(t, 1, len(annotations))
   106  	require.Equal(t, "test annotation 1", annotations[0])
   107  
   108  	downsampledRows = readDownsampledRows(t, filepath.Join(outDir, "profiles_1h_sum.parquet"), 2)
   109  
   110  	schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) {
   111  		require.Equal(t, 3, len(values))
   112  		require.Equal(t, int64(110), values[0].Int64()) // a, b, c
   113  		require.Equal(t, int64(80), values[1].Int64())  // a, b, c, d
   114  		require.Equal(t, int64(20), values[2].Int64())  // a, b, c, d, e
   115  	})
   116  
   117  	annotations = make([]string, 0)
   118  	schemav1.DownsampledProfileRow(downsampledRows[0]).ForAnnotationValues(func(values []parquet.Value) {
   119  		annotations = append(annotations, values[0].String())
   120  	})
   121  	schemav1.DownsampledProfileRow(downsampledRows[1]).ForAnnotationValues(func(values []parquet.Value) {
   122  		annotations = append(annotations, values[0].String())
   123  	})
   124  	require.Equal(t, 2, len(annotations))
   125  	require.Equal(t, "test annotation 1", annotations[0])
   126  	require.Equal(t, "test annotation 2", annotations[1])
   127  }
   128  
   129  func TestDownsampler_VaryingFingerprints(t *testing.T) {
   130  	profiles := make([]schemav1.InMemoryProfile, 0)
   131  	for i := 0; i < 5; i++ {
   132  		builder := testhelper.NewProfileBuilder(1703853310000000000).CPUProfile() // 2023-12-29T12:35:10Z
   133  		builder.ForStacktraceString("a", "b", "c").AddSamples(30)
   134  		batch, _ := schemav1testhelper.NewProfileSchema(builder, "cpu")
   135  		profiles = append(profiles, batch...)
   136  	}
   137  
   138  	reader := schemav1.NewInMemoryProfilesRowReader(profiles)
   139  	rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024)
   140  	require.NoError(t, err)
   141  
   142  	outDir := t.TempDir()
   143  	d, err := NewDownsampler(outDir, log.NewNopLogger())
   144  	require.NoError(t, err)
   145  
   146  	for i, row := range rows {
   147  		err = d.AddRow(schemav1.ProfileRow(row), model.Fingerprint(i))
   148  		require.NoError(t, err)
   149  	}
   150  
   151  	err = d.Close()
   152  	require.NoError(t, err)
   153  
   154  	verifyProfileCount(t, outDir, "profiles_5m_sum.parquet", 5)
   155  	verifyProfileCount(t, outDir, "profiles_1h_sum.parquet", 5)
   156  }
   157  
   158  func TestDownsampler_VaryingPartition(t *testing.T) {
   159  	profiles := make([]schemav1.InMemoryProfile, 0)
   160  	builder := testhelper.NewProfileBuilder(1703853310000000000).CPUProfile()
   161  	builder.ForStacktraceString("a", "b", "c").AddSamples(30)
   162  	builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20)
   163  	batch, _ := schemav1testhelper.NewProfileSchema(builder, "cpu")
   164  	profiles = append(profiles, batch...)
   165  
   166  	builder = testhelper.NewProfileBuilder(1703853311000000000).CPUProfile()
   167  	builder.ForStacktraceString("a", "b", "c").AddSamples(30)
   168  	builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20)
   169  	batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu")
   170  	profiles = append(profiles, batch...)
   171  
   172  	reader := schemav1.NewInMemoryProfilesRowReader(profiles)
   173  	rows, err := phlareparquet.ReadAllWithBufferSize(reader, 5)
   174  	require.NoError(t, err)
   175  
   176  	outDir := t.TempDir()
   177  	d, err := NewDownsampler(outDir, log.NewNopLogger())
   178  	require.NoError(t, err)
   179  
   180  	for i, row := range rows {
   181  		r := schemav1.ProfileRow(row)
   182  		r.SetStacktracePartitionID(uint64(i))
   183  		err = d.AddRow(r, 1)
   184  		require.NoError(t, err)
   185  	}
   186  
   187  	err = d.Close()
   188  	require.NoError(t, err)
   189  
   190  	downsampledRows := readDownsampledRows(t, filepath.Join(outDir, "profiles_5m_sum.parquet"), 2)
   191  	schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) {
   192  		assert.Equal(t, 2, len(values))
   193  		assert.Equal(t, int64(30), values[0].Int64()) // a, b, c
   194  		assert.Equal(t, int64(20), values[1].Int64()) // a, b, c, d
   195  	})
   196  
   197  	downsampledRows = readDownsampledRows(t, filepath.Join(outDir, "profiles_1h_sum.parquet"), 2)
   198  	schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) {
   199  		assert.Equal(t, 2, len(values))
   200  		assert.Equal(t, int64(30), values[0].Int64()) // a, b, c
   201  		assert.Equal(t, int64(20), values[1].Int64()) // a, b, c, d
   202  	})
   203  }
   204  
   205  func BenchmarkDownsampler_AddRow(b *testing.B) {
   206  	f, err := os.Open("../testdata/01HHYG6245NWHZWVP27V8WJRT7/profiles.parquet")
   207  	require.NoError(b, err)
   208  	defer func() {
   209  		require.NoError(b, f.Close())
   210  	}()
   211  
   212  	reader := parquet.NewGenericReader[*schemav1.Profile](f, schemav1.ProfilesSchema)
   213  	rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024)
   214  	require.NoError(b, err)
   215  
   216  	b.ResetTimer()
   217  	b.ReportAllocs()
   218  	for i := 0; i < b.N; i++ {
   219  		outDir := b.TempDir()
   220  		d, err := NewDownsampler(outDir, log.NewNopLogger())
   221  
   222  		require.NoError(b, err)
   223  		for _, row := range rows {
   224  			err = d.AddRow(schemav1.ProfileRow(row), 1)
   225  			require.NoError(b, err)
   226  		}
   227  
   228  		err = d.Close()
   229  		require.NoError(b, err)
   230  	}
   231  }
   232  
   233  func verifyProfileCount(t *testing.T, dir string, file string, expectedRows int) {
   234  	stat, err := os.Stat(filepath.Join(dir, file))
   235  	require.NoError(t, err)
   236  	require.True(t, stat.Size() > 0)
   237  
   238  	outFile, err := os.Open(filepath.Join(dir, file))
   239  	require.NoError(t, err)
   240  	defer func() {
   241  		require.NoError(t, outFile.Close())
   242  	}()
   243  
   244  	pf, err := parquet.OpenFile(outFile, stat.Size())
   245  	require.NoError(t, err)
   246  
   247  	outReader := parquet.NewReader(pf, schemav1.DownsampledProfilesSchema)
   248  	require.Equal(t, int64(expectedRows), outReader.NumRows())
   249  }
   250  
   251  func readDownsampledRows(t *testing.T, path string, expectedRowCount int) []parquet.Row {
   252  	stat, err := os.Stat(path)
   253  	require.NoError(t, err)
   254  	require.True(t, stat.Size() > 0)
   255  
   256  	outFile, err := os.Open(path)
   257  	require.NoError(t, err)
   258  	defer func() {
   259  		require.NoError(t, outFile.Close())
   260  	}()
   261  
   262  	pf, err := parquet.OpenFile(outFile, stat.Size())
   263  	require.NoError(t, err)
   264  
   265  	reader := parquet.NewReader(pf, schemav1.DownsampledProfilesSchema)
   266  	downsampledRows := make([]parquet.Row, reader.NumRows())
   267  	rowCount, err := reader.ReadRows(downsampledRows)
   268  	require.NoError(t, err)
   269  
   270  	require.Equal(t, expectedRowCount, rowCount)
   271  	return downsampledRows
   272  }