github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/downsample/downsample_test.go (about) 1 package downsample 2 3 import ( 4 "os" 5 "path/filepath" 6 "testing" 7 8 "github.com/go-kit/log" 9 "github.com/parquet-go/parquet-go" 10 "github.com/prometheus/common/model" 11 "github.com/stretchr/testify/assert" 12 "github.com/stretchr/testify/require" 13 14 phlareparquet "github.com/grafana/pyroscope/pkg/parquet" 15 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 16 schemav1testhelper "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1/testhelper" 17 "github.com/grafana/pyroscope/pkg/pprof/testhelper" 18 ) 19 20 func TestDownsampler_ProfileCounts(t *testing.T) { 21 outDir := t.TempDir() 22 d, err := NewDownsampler(outDir, log.NewNopLogger()) 23 require.NoError(t, err) 24 25 f, err := os.Open("../testdata/01HHYG6245NWHZWVP27V8WJRT7/profiles.parquet") 26 require.NoError(t, err) 27 defer func() { 28 require.NoError(t, f.Close()) 29 }() 30 31 reader := parquet.NewReader(f, schemav1.ProfilesSchema) 32 rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024) 33 require.NoError(t, err) 34 35 for _, row := range rows { 36 err = d.AddRow(schemav1.ProfileRow(row), 1) 37 require.NoError(t, err) 38 } 39 40 err = d.Close() 41 require.NoError(t, err) 42 43 verifyProfileCount(t, outDir, "profiles_5m_sum.parquet", 1869) 44 verifyProfileCount(t, outDir, "profiles_1h_sum.parquet", 50) 45 } 46 47 func TestDownsampler_Aggregation(t *testing.T) { 48 profiles := make([]schemav1.InMemoryProfile, 0) 49 builder := testhelper.NewProfileBuilder(1703853310000000000).CPUProfile() // 2023-12-29T12:35:10Z 50 builder.ForStacktraceString("a", "b", "c").AddSamples(30) 51 builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20) 52 builder.WithAnnotations("test annotation 1") 53 batch, _ := schemav1testhelper.NewProfileSchema(builder, "cpu") 54 profiles = append(profiles, batch...) 55 56 builder = testhelper.NewProfileBuilder(1703853559000000000).CPUProfile() // 2023-12-29T12:39:19Z 57 builder.ForStacktraceString("a", "b", "c").AddSamples(40) 58 builder.ForStacktraceString("a", "b", "c", "d").AddSamples(30) 59 builder.ForStacktraceString("a", "b", "c", "d", "e").AddSamples(20) 60 batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu") 61 profiles = append(profiles, batch...) 62 63 builder = testhelper.NewProfileBuilder(1703854209000000000).CPUProfile() // 2023-12-29T12:50:09Z 64 builder.ForStacktraceString("a", "b", "c").AddSamples(40) 65 builder.ForStacktraceString("a", "b", "c", "d").AddSamples(30) 66 batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu") 67 profiles = append(profiles, batch...) 68 69 builder = testhelper.NewProfileBuilder(1703858409000000000).CPUProfile() // 2023-12-29T14:00:09Z 70 builder.ForStacktraceString("a", "b", "c").AddSamples(30) 71 builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20) 72 builder.WithAnnotations("test annotation 2") 73 batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu") 74 profiles = append(profiles, batch...) 75 76 reader := schemav1.NewInMemoryProfilesRowReader(profiles) 77 rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024) 78 require.NoError(t, err) 79 80 outDir := t.TempDir() 81 d, err := NewDownsampler(outDir, log.NewNopLogger()) 82 require.NoError(t, err) 83 84 for _, row := range rows { 85 err = d.AddRow(schemav1.ProfileRow(row), 1) 86 require.NoError(t, err) 87 } 88 89 err = d.Close() 90 require.NoError(t, err) 91 92 downsampledRows := readDownsampledRows(t, filepath.Join(outDir, "profiles_5m_sum.parquet"), 3) 93 94 schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) { 95 require.Equal(t, 3, len(values)) 96 require.Equal(t, int64(70), values[0].Int64()) // a, b, c 97 require.Equal(t, int64(50), values[1].Int64()) // a, b, c, d 98 require.Equal(t, int64(20), values[2].Int64()) // a, b, c, d, e 99 }) 100 101 annotations := make([]string, 0) 102 schemav1.DownsampledProfileRow(downsampledRows[0]).ForAnnotationValues(func(values []parquet.Value) { 103 annotations = append(annotations, values[0].String()) 104 }) 105 require.Equal(t, 1, len(annotations)) 106 require.Equal(t, "test annotation 1", annotations[0]) 107 108 downsampledRows = readDownsampledRows(t, filepath.Join(outDir, "profiles_1h_sum.parquet"), 2) 109 110 schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) { 111 require.Equal(t, 3, len(values)) 112 require.Equal(t, int64(110), values[0].Int64()) // a, b, c 113 require.Equal(t, int64(80), values[1].Int64()) // a, b, c, d 114 require.Equal(t, int64(20), values[2].Int64()) // a, b, c, d, e 115 }) 116 117 annotations = make([]string, 0) 118 schemav1.DownsampledProfileRow(downsampledRows[0]).ForAnnotationValues(func(values []parquet.Value) { 119 annotations = append(annotations, values[0].String()) 120 }) 121 schemav1.DownsampledProfileRow(downsampledRows[1]).ForAnnotationValues(func(values []parquet.Value) { 122 annotations = append(annotations, values[0].String()) 123 }) 124 require.Equal(t, 2, len(annotations)) 125 require.Equal(t, "test annotation 1", annotations[0]) 126 require.Equal(t, "test annotation 2", annotations[1]) 127 } 128 129 func TestDownsampler_VaryingFingerprints(t *testing.T) { 130 profiles := make([]schemav1.InMemoryProfile, 0) 131 for i := 0; i < 5; i++ { 132 builder := testhelper.NewProfileBuilder(1703853310000000000).CPUProfile() // 2023-12-29T12:35:10Z 133 builder.ForStacktraceString("a", "b", "c").AddSamples(30) 134 batch, _ := schemav1testhelper.NewProfileSchema(builder, "cpu") 135 profiles = append(profiles, batch...) 136 } 137 138 reader := schemav1.NewInMemoryProfilesRowReader(profiles) 139 rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024) 140 require.NoError(t, err) 141 142 outDir := t.TempDir() 143 d, err := NewDownsampler(outDir, log.NewNopLogger()) 144 require.NoError(t, err) 145 146 for i, row := range rows { 147 err = d.AddRow(schemav1.ProfileRow(row), model.Fingerprint(i)) 148 require.NoError(t, err) 149 } 150 151 err = d.Close() 152 require.NoError(t, err) 153 154 verifyProfileCount(t, outDir, "profiles_5m_sum.parquet", 5) 155 verifyProfileCount(t, outDir, "profiles_1h_sum.parquet", 5) 156 } 157 158 func TestDownsampler_VaryingPartition(t *testing.T) { 159 profiles := make([]schemav1.InMemoryProfile, 0) 160 builder := testhelper.NewProfileBuilder(1703853310000000000).CPUProfile() 161 builder.ForStacktraceString("a", "b", "c").AddSamples(30) 162 builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20) 163 batch, _ := schemav1testhelper.NewProfileSchema(builder, "cpu") 164 profiles = append(profiles, batch...) 165 166 builder = testhelper.NewProfileBuilder(1703853311000000000).CPUProfile() 167 builder.ForStacktraceString("a", "b", "c").AddSamples(30) 168 builder.ForStacktraceString("a", "b", "c", "d").AddSamples(20) 169 batch, _ = schemav1testhelper.NewProfileSchema(builder, "cpu") 170 profiles = append(profiles, batch...) 171 172 reader := schemav1.NewInMemoryProfilesRowReader(profiles) 173 rows, err := phlareparquet.ReadAllWithBufferSize(reader, 5) 174 require.NoError(t, err) 175 176 outDir := t.TempDir() 177 d, err := NewDownsampler(outDir, log.NewNopLogger()) 178 require.NoError(t, err) 179 180 for i, row := range rows { 181 r := schemav1.ProfileRow(row) 182 r.SetStacktracePartitionID(uint64(i)) 183 err = d.AddRow(r, 1) 184 require.NoError(t, err) 185 } 186 187 err = d.Close() 188 require.NoError(t, err) 189 190 downsampledRows := readDownsampledRows(t, filepath.Join(outDir, "profiles_5m_sum.parquet"), 2) 191 schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) { 192 assert.Equal(t, 2, len(values)) 193 assert.Equal(t, int64(30), values[0].Int64()) // a, b, c 194 assert.Equal(t, int64(20), values[1].Int64()) // a, b, c, d 195 }) 196 197 downsampledRows = readDownsampledRows(t, filepath.Join(outDir, "profiles_1h_sum.parquet"), 2) 198 schemav1.DownsampledProfileRow(downsampledRows[0]).ForValues(func(values []parquet.Value) { 199 assert.Equal(t, 2, len(values)) 200 assert.Equal(t, int64(30), values[0].Int64()) // a, b, c 201 assert.Equal(t, int64(20), values[1].Int64()) // a, b, c, d 202 }) 203 } 204 205 func BenchmarkDownsampler_AddRow(b *testing.B) { 206 f, err := os.Open("../testdata/01HHYG6245NWHZWVP27V8WJRT7/profiles.parquet") 207 require.NoError(b, err) 208 defer func() { 209 require.NoError(b, f.Close()) 210 }() 211 212 reader := parquet.NewGenericReader[*schemav1.Profile](f, schemav1.ProfilesSchema) 213 rows, err := phlareparquet.ReadAllWithBufferSize(reader, 1024) 214 require.NoError(b, err) 215 216 b.ResetTimer() 217 b.ReportAllocs() 218 for i := 0; i < b.N; i++ { 219 outDir := b.TempDir() 220 d, err := NewDownsampler(outDir, log.NewNopLogger()) 221 222 require.NoError(b, err) 223 for _, row := range rows { 224 err = d.AddRow(schemav1.ProfileRow(row), 1) 225 require.NoError(b, err) 226 } 227 228 err = d.Close() 229 require.NoError(b, err) 230 } 231 } 232 233 func verifyProfileCount(t *testing.T, dir string, file string, expectedRows int) { 234 stat, err := os.Stat(filepath.Join(dir, file)) 235 require.NoError(t, err) 236 require.True(t, stat.Size() > 0) 237 238 outFile, err := os.Open(filepath.Join(dir, file)) 239 require.NoError(t, err) 240 defer func() { 241 require.NoError(t, outFile.Close()) 242 }() 243 244 pf, err := parquet.OpenFile(outFile, stat.Size()) 245 require.NoError(t, err) 246 247 outReader := parquet.NewReader(pf, schemav1.DownsampledProfilesSchema) 248 require.Equal(t, int64(expectedRows), outReader.NumRows()) 249 } 250 251 func readDownsampledRows(t *testing.T, path string, expectedRowCount int) []parquet.Row { 252 stat, err := os.Stat(path) 253 require.NoError(t, err) 254 require.True(t, stat.Size() > 0) 255 256 outFile, err := os.Open(path) 257 require.NoError(t, err) 258 defer func() { 259 require.NoError(t, outFile.Close()) 260 }() 261 262 pf, err := parquet.OpenFile(outFile, stat.Size()) 263 require.NoError(t, err) 264 265 reader := parquet.NewReader(pf, schemav1.DownsampledProfilesSchema) 266 downsampledRows := make([]parquet.Row, reader.NumRows()) 267 rowCount, err := reader.ReadRows(downsampledRows) 268 require.NoError(t, err) 269 270 require.Equal(t, expectedRowCount, rowCount) 271 return downsampledRows 272 }