github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/schemas/v1/profiles_test.go (about)

     1  package v1
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"math/rand"
     8  	"sort"
     9  	"testing"
    10  
    11  	"github.com/google/uuid"
    12  	"github.com/parquet-go/parquet-go"
    13  	"github.com/stretchr/testify/assert"
    14  	"github.com/stretchr/testify/require"
    15  
    16  	phlareparquet "github.com/grafana/pyroscope/pkg/parquet"
    17  )
    18  
    19  func TestInMemoryProfilesRowReader(t *testing.T) {
    20  	r := NewProfilesRowReader(
    21  		generateProfiles(10),
    22  	)
    23  
    24  	batch := make([]parquet.Row, 3)
    25  	count := 0
    26  	for {
    27  		n, err := r.ReadRows(batch)
    28  		if err != nil && err != io.EOF {
    29  			t.Fatal(err)
    30  		}
    31  		count += n
    32  		if n == 0 || err == io.EOF {
    33  			break
    34  		}
    35  	}
    36  	require.Equal(t, 10, count)
    37  }
    38  
    39  const samplesPerProfile = 100
    40  
    41  func TestRoundtripProfile(t *testing.T) {
    42  	profiles := generateProfiles(1000)
    43  	iprofiles := generateMemoryProfiles(1000)
    44  	actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(iprofiles))
    45  	require.NoError(t, err)
    46  	expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles))
    47  	require.NoError(t, err)
    48  	require.Equal(t, expected, actual)
    49  	_ = expected
    50  	_ = actual
    51  
    52  	t.Run("EmptyOptionalField", func(t *testing.T) {
    53  		profiles := generateProfiles(1)
    54  		for _, p := range profiles {
    55  			p.DurationNanos = 0
    56  			p.Period = 0
    57  			p.DefaultSampleType = 0
    58  			p.KeepFrames = 0
    59  		}
    60  		inMemoryProfiles := generateMemoryProfiles(1)
    61  		for i := range inMemoryProfiles {
    62  			inMemoryProfiles[i].DurationNanos = 0
    63  			inMemoryProfiles[i].Period = 0
    64  			inMemoryProfiles[i].DefaultSampleType = 0
    65  			inMemoryProfiles[i].KeepFrames = 0
    66  		}
    67  		expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles))
    68  		require.NoError(t, err)
    69  		actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles))
    70  		require.NoError(t, err)
    71  		require.Equal(t, expected, actual)
    72  	})
    73  	t.Run("EmptyComment", func(t *testing.T) {
    74  		profiles := generateProfiles(1)
    75  		for _, p := range profiles {
    76  			p.Comments = nil
    77  		}
    78  		inMemoryProfiles := generateMemoryProfiles(1)
    79  		for i := range inMemoryProfiles {
    80  			inMemoryProfiles[i].Comments = nil
    81  		}
    82  		expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles))
    83  		require.NoError(t, err)
    84  		actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles))
    85  		require.NoError(t, err)
    86  		require.Equal(t, expected, actual)
    87  	})
    88  
    89  	t.Run("EmptySamples", func(t *testing.T) {
    90  		profiles := generateProfiles(1)
    91  		for _, p := range profiles {
    92  			p.Samples = nil
    93  		}
    94  		inMemoryProfiles := generateMemoryProfiles(1)
    95  		for i := range inMemoryProfiles {
    96  			inMemoryProfiles[i].Samples = Samples{}
    97  		}
    98  		expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles))
    99  		require.NoError(t, err)
   100  		actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles))
   101  		require.NoError(t, err)
   102  		require.Equal(t, expected, actual)
   103  	})
   104  	t.Run("SampleSpanID", func(t *testing.T) {
   105  		profiles := generateProfiles(1)
   106  		for _, p := range profiles {
   107  			for _, x := range p.Samples {
   108  				x.SpanID = rand.Uint64()
   109  			}
   110  		}
   111  		inMemoryProfiles := generateMemoryProfiles(1)
   112  		for i := range inMemoryProfiles {
   113  			spans := make([]uint64, len(inMemoryProfiles[i].Samples.Values))
   114  			for j := range spans {
   115  				spans[j] = profiles[i].Samples[j].SpanID
   116  			}
   117  			inMemoryProfiles[i].Samples.Spans = spans
   118  		}
   119  		expected, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles))
   120  		require.NoError(t, err)
   121  		actual, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(inMemoryProfiles))
   122  		require.NoError(t, err)
   123  		require.Equal(t, expected, actual)
   124  	})
   125  }
   126  
   127  func TestCompactSamples(t *testing.T) {
   128  	require.Equal(t, Samples{
   129  		StacktraceIDs: []uint32{1, 2, 3, 2, 5, 1, 7, 7, 1},
   130  		Values:        []uint64{1, 1, 1, 1, 1, 3, 1, 0, 1},
   131  	}.Compact(true), Samples{
   132  		StacktraceIDs: []uint32{1, 2, 3, 5, 7},
   133  		Values:        []uint64{5, 2, 1, 1, 1},
   134  	})
   135  
   136  	require.Equal(t, Samples{
   137  		StacktraceIDs: []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9},
   138  		Values:        []uint64{1, 0, 1, 1, 1, 0, 1, 1, 0},
   139  	}.Compact(false), Samples{
   140  		StacktraceIDs: []uint32{1, 3, 4, 5, 7, 8},
   141  		Values:        []uint64{1, 1, 1, 1, 1, 1},
   142  	})
   143  
   144  	require.Equal(t, Samples{
   145  		StacktraceIDs: []uint32{1, 2, 3},
   146  		Values:        []uint64{1, 2, 3},
   147  	}.Compact(false), Samples{
   148  		StacktraceIDs: []uint32{1, 2, 3},
   149  		Values:        []uint64{1, 2, 3},
   150  	})
   151  }
   152  
   153  func BenchmarkRowReader(b *testing.B) {
   154  	profiles := generateProfiles(1000)
   155  	iprofiles := generateMemoryProfiles(1000)
   156  	b.Run("in-memory", func(b *testing.B) {
   157  		b.ResetTimer()
   158  		for i := 0; i < b.N; i++ {
   159  			_, err := phlareparquet.ReadAll(NewInMemoryProfilesRowReader(iprofiles))
   160  			if err != nil {
   161  				b.Fatal(err)
   162  			}
   163  		}
   164  	})
   165  	b.Run("schema", func(b *testing.B) {
   166  		b.ResetTimer()
   167  		for i := 0; i < b.N; i++ {
   168  			_, err := phlareparquet.ReadAll(NewProfilesRowReader(profiles))
   169  			if err != nil {
   170  				b.Fatal(err)
   171  			}
   172  		}
   173  	})
   174  }
   175  
   176  func TestMergeProfiles(t *testing.T) {
   177  	reader := NewMergeProfilesRowReader([]parquet.RowReader{
   178  		NewInMemoryProfilesRowReader([]InMemoryProfile{
   179  			{SeriesIndex: 1, TimeNanos: 1},
   180  			{SeriesIndex: 2, TimeNanos: 2},
   181  			{SeriesIndex: 3, TimeNanos: 3},
   182  		}),
   183  		NewInMemoryProfilesRowReader([]InMemoryProfile{
   184  			{SeriesIndex: 1, TimeNanos: 4},
   185  			{SeriesIndex: 2, TimeNanos: 5},
   186  			{SeriesIndex: 3, TimeNanos: 6},
   187  		}),
   188  		NewInMemoryProfilesRowReader([]InMemoryProfile{
   189  			{SeriesIndex: 1, TimeNanos: 7},
   190  			{SeriesIndex: 2, TimeNanos: 8},
   191  			{SeriesIndex: 3, TimeNanos: 9},
   192  		}),
   193  	})
   194  
   195  	actual, err := phlareparquet.ReadAll(reader)
   196  	require.NoError(t, err)
   197  	compareProfileRows(t, generateProfileRow([]InMemoryProfile{
   198  		{SeriesIndex: 1, TimeNanos: 1},
   199  		{SeriesIndex: 1, TimeNanos: 4},
   200  		{SeriesIndex: 1, TimeNanos: 7},
   201  		{SeriesIndex: 2, TimeNanos: 2},
   202  		{SeriesIndex: 2, TimeNanos: 5},
   203  		{SeriesIndex: 2, TimeNanos: 8},
   204  		{SeriesIndex: 3, TimeNanos: 3},
   205  		{SeriesIndex: 3, TimeNanos: 6},
   206  		{SeriesIndex: 3, TimeNanos: 9},
   207  	}), actual)
   208  }
   209  
   210  func TestLessProfileRows(t *testing.T) {
   211  	for _, tc := range []struct {
   212  		a, b     parquet.Row
   213  		expected bool
   214  	}{
   215  		{
   216  			a:        generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0],
   217  			b:        generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0],
   218  			expected: false,
   219  		},
   220  		{
   221  			a:        generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0],
   222  			b:        generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 2}})[0],
   223  			expected: true,
   224  		},
   225  		{
   226  			a:        generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0],
   227  			b:        generateProfileRow([]InMemoryProfile{{SeriesIndex: 2, TimeNanos: 1}})[0],
   228  			expected: true,
   229  		},
   230  	} {
   231  		t.Run("", func(t *testing.T) {
   232  			require.Equal(t, tc.expected, lessProfileRows(tc.a, tc.b))
   233  		})
   234  	}
   235  }
   236  
   237  func TestProfileRowStacktraceIDs(t *testing.T) {
   238  	for _, tc := range []struct {
   239  		name     string
   240  		expected []uint32
   241  		profile  InMemoryProfile
   242  	}{
   243  		{"empty", nil, InMemoryProfile{}},
   244  		{"one sample", []uint32{1}, InMemoryProfile{
   245  			SeriesIndex:         1,
   246  			StacktracePartition: 2,
   247  			TotalValue:          3,
   248  			Samples:             Samples{StacktraceIDs: []uint32{1}, Values: []uint64{1}},
   249  		}},
   250  		{"many", []uint32{1, 1, 2, 3, 4}, InMemoryProfile{
   251  			SeriesIndex:         1,
   252  			StacktracePartition: 2,
   253  			TotalValue:          3,
   254  			Samples: Samples{
   255  				StacktraceIDs: []uint32{1, 1, 2, 3, 4},
   256  				Values:        []uint64{4, 2, 4, 5, 2},
   257  			},
   258  		}},
   259  	} {
   260  		tc := tc
   261  		t.Run(tc.name, func(t *testing.T) {
   262  			rows := generateProfileRow([]InMemoryProfile{tc.profile})
   263  			var ids []uint32
   264  			ProfileRow(rows[0]).ForStacktraceIDsValues(func(values []parquet.Value) {
   265  				for _, v := range values {
   266  					ids = append(ids, v.Uint32())
   267  				}
   268  			})
   269  			require.Equal(t, tc.expected, ids)
   270  		})
   271  	}
   272  }
   273  
   274  func TestProfileRowMutateValues(t *testing.T) {
   275  	row := ProfileRow(generateProfileRow([]InMemoryProfile{
   276  		{
   277  			Samples: Samples{
   278  				StacktraceIDs: []uint32{1, 1, 2, 3, 4},
   279  				Values:        []uint64{4, 2, 4, 5, 2},
   280  			},
   281  		},
   282  	})[0])
   283  	row.ForStacktraceIDsValues(func(values []parquet.Value) {
   284  		for i := range values {
   285  			values[i] = parquet.Int32Value(1).Level(0, 1, values[i].Column())
   286  		}
   287  	})
   288  	var ids []uint32
   289  	row.ForStacktraceIDsValues(func(values []parquet.Value) {
   290  		for _, v := range values {
   291  			ids = append(ids, v.Uint32())
   292  		}
   293  	})
   294  	require.Equal(t, []uint32{1, 1, 1, 1, 1}, ids)
   295  }
   296  
   297  func BenchmarkProfileRows(b *testing.B) {
   298  	a := generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 1}})[0]
   299  	a1 := generateProfileRow([]InMemoryProfile{{SeriesIndex: 1, TimeNanos: 2}})[0]
   300  	a2 := generateProfileRow([]InMemoryProfile{{SeriesIndex: 2, TimeNanos: 1}})[0]
   301  
   302  	b.ResetTimer()
   303  	b.ReportAllocs()
   304  
   305  	for i := 0; i < b.N; i++ {
   306  		lessProfileRows(a, a)
   307  		lessProfileRows(a, a1)
   308  		lessProfileRows(a, a2)
   309  	}
   310  }
   311  
   312  func Benchmark_SpanID_Encoding(b *testing.B) {
   313  	const profilesN = 1000
   314  
   315  	profiles := func(share float64) []InMemoryProfile {
   316  		randomSpanIDs := make([]uint64, int(samplesPerProfile*share))
   317  		inMemoryProfiles := generateMemoryProfiles(profilesN)
   318  		for j := range inMemoryProfiles {
   319  			for i := range randomSpanIDs {
   320  				randomSpanIDs[i] = rand.Uint64()
   321  			}
   322  			spans := make([]uint64, len(inMemoryProfiles[j].Samples.Values))
   323  			for o := range spans {
   324  				spans[o] = randomSpanIDs[o%len(randomSpanIDs)]
   325  			}
   326  			inMemoryProfiles[j].Samples.Spans = spans
   327  			// We only need this for RLE.
   328  			sort.Sort(SamplesBySpanID(inMemoryProfiles[j].Samples))
   329  		}
   330  		return inMemoryProfiles
   331  	}
   332  
   333  	for _, share := range []float64{
   334  		1,
   335  		0.5,
   336  		0.25,
   337  		0.15,
   338  		0.05,
   339  	} {
   340  		share := share
   341  		b.Run(fmt.Sprintf("%v (%d/%d)", share, int(samplesPerProfile*share), samplesPerProfile), func(b *testing.B) {
   342  			inMemoryProfiles := profiles(share)
   343  			var buf bytes.Buffer
   344  			w := parquet.NewGenericWriter[*Profile](&buf, ProfilesSchema)
   345  
   346  			n, err := parquet.CopyRows(w, NewInMemoryProfilesRowReader(inMemoryProfiles))
   347  			require.NoError(b, err)
   348  			require.Equal(b, len(inMemoryProfiles), int(n))
   349  			require.NoError(b, w.Close())
   350  
   351  			b.ResetTimer()
   352  			b.ReportAllocs()
   353  
   354  			for i := 0; i < b.N; i++ {
   355  				b.ReportMetric(float64(buf.Len()), "bytes")
   356  				r := parquet.NewReader(bytes.NewReader(buf.Bytes()), ProfilesSchema)
   357  				n, err = parquet.CopyRows(parquet.MultiRowWriter(), r)
   358  				require.NoError(b, err)
   359  				require.Equal(b, len(inMemoryProfiles), int(n))
   360  			}
   361  		})
   362  	}
   363  }
   364  
   365  func compareProfileRows(t *testing.T, expected, actual []parquet.Row) {
   366  	t.Helper()
   367  	require.Equal(t, len(expected), len(actual))
   368  	for i := range expected {
   369  		expectedProfile, actualProfile := &Profile{}, &Profile{}
   370  		require.NoError(t, ProfilesSchema.Reconstruct(actualProfile, actual[i]))
   371  		require.NoError(t, ProfilesSchema.Reconstruct(expectedProfile, expected[i]))
   372  		require.Equal(t, expectedProfile, actualProfile, "row %d", i)
   373  	}
   374  }
   375  
   376  func generateProfileRow(in []InMemoryProfile) []parquet.Row {
   377  	rows := make([]parquet.Row, len(in))
   378  	for i, p := range in {
   379  		rows[i] = deconstructMemoryProfile(p, rows[i])
   380  	}
   381  	return rows
   382  }
   383  
   384  func generateMemoryProfiles(n int) []InMemoryProfile {
   385  	profiles := make([]InMemoryProfile, n)
   386  	for i := 0; i < n; i++ {
   387  		stacktraceID := make([]uint32, samplesPerProfile)
   388  		value := make([]uint64, samplesPerProfile)
   389  		for j := 0; j < samplesPerProfile; j++ {
   390  			stacktraceID[j] = uint32(j)
   391  			value[j] = uint64(j)
   392  		}
   393  		profiles[i] = InMemoryProfile{
   394  			ID:                uuid.MustParse(fmt.Sprintf("00000000-0000-0000-0000-%012d", i)),
   395  			SeriesIndex:       uint32(i),
   396  			DropFrames:        1,
   397  			KeepFrames:        3,
   398  			TimeNanos:         int64(i),
   399  			TotalValue:        100,
   400  			Period:            100000,
   401  			DurationNanos:     1000000000,
   402  			Comments:          []int64{1, 2, 3},
   403  			DefaultSampleType: 2,
   404  			Samples: Samples{
   405  				StacktraceIDs: stacktraceID,
   406  				Values:        value,
   407  			},
   408  		}
   409  	}
   410  	return profiles
   411  }
   412  
   413  func generateProfiles(n int) []*Profile {
   414  	profiles := make([]*Profile, n)
   415  	for i := 0; i < n; i++ {
   416  		profiles[i] = &Profile{
   417  			ID:                uuid.MustParse(fmt.Sprintf("00000000-0000-0000-0000-%012d", i)),
   418  			SeriesIndex:       uint32(i),
   419  			DropFrames:        1,
   420  			KeepFrames:        3,
   421  			TotalValue:        100,
   422  			TimeNanos:         int64(i),
   423  			Period:            100000,
   424  			DurationNanos:     1000000000,
   425  			Comments:          []int64{1, 2, 3},
   426  			DefaultSampleType: 2,
   427  			Samples:           generateSamples(samplesPerProfile),
   428  		}
   429  	}
   430  
   431  	return profiles
   432  }
   433  
   434  func generateSamples(n int) []*Sample {
   435  	samples := make([]*Sample, n)
   436  	for i := 0; i < n; i++ {
   437  		samples[i] = &Sample{
   438  			StacktraceID: uint64(i),
   439  			Value:        int64(i),
   440  		}
   441  	}
   442  	return samples
   443  }
   444  
   445  func Test_SamplesFromMap(t *testing.T) {
   446  	m := map[uint32]uint64{
   447  		1: 2,
   448  		0: 0,
   449  		2: 3,
   450  		3: 0,
   451  	}
   452  	samples := NewSamplesFromMap(m)
   453  	assert.Equal(t, len(m), cap(samples.Values))
   454  	assert.Equal(t, 2, len(samples.Values))
   455  }
   456  
   457  func Test_SamplesRange(t *testing.T) {
   458  	tests := []struct {
   459  		name     string
   460  		input    Samples
   461  		n, m     int
   462  		expected Samples
   463  	}{
   464  		{
   465  			name: "empty spans",
   466  			input: Samples{
   467  				StacktraceIDs: []uint32{1, 2, 3, 4, 5},
   468  				Values:        []uint64{10, 20, 30, 40, 50},
   469  			},
   470  			n: 1,
   471  			m: 3,
   472  			expected: Samples{
   473  				StacktraceIDs: []uint32{2, 3},
   474  				Values:        []uint64{20, 30},
   475  			},
   476  		},
   477  		{
   478  			name: "non-empty Spans",
   479  			input: Samples{
   480  				StacktraceIDs: []uint32{1, 2, 3, 4, 5},
   481  				Values:        []uint64{10, 20, 30, 40, 50},
   482  				Spans:         []uint64{100, 200, 300, 400, 500},
   483  			},
   484  			n: 1,
   485  			m: 4,
   486  			expected: Samples{
   487  				StacktraceIDs: []uint32{2, 3, 4},
   488  				Values:        []uint64{20, 30, 40},
   489  				Spans:         []uint64{200, 300, 400},
   490  			},
   491  		},
   492  		{
   493  			name: "all",
   494  			input: Samples{
   495  				StacktraceIDs: []uint32{1, 2, 3},
   496  				Values:        []uint64{10, 20, 30},
   497  				Spans:         []uint64{100, 200, 300},
   498  			},
   499  			n: 0,
   500  			m: 3,
   501  			expected: Samples{
   502  				StacktraceIDs: []uint32{1, 2, 3},
   503  				Values:        []uint64{10, 20, 30},
   504  				Spans:         []uint64{100, 200, 300},
   505  			},
   506  		},
   507  		{
   508  			name: "oob: n < 0",
   509  			input: Samples{
   510  				StacktraceIDs: []uint32{1, 2, 3},
   511  				Values:        []uint64{10, 20, 30},
   512  			},
   513  			n: -1,
   514  			m: 3,
   515  		},
   516  		{
   517  			name: "oob: m > n",
   518  			input: Samples{
   519  				StacktraceIDs: []uint32{1, 2, 3},
   520  				Values:        []uint64{10, 20, 30},
   521  			},
   522  			n: 3,
   523  			m: 1,
   524  		},
   525  		{
   526  			name: "oob: m > len",
   527  			input: Samples{
   528  				StacktraceIDs: []uint32{1, 2, 3},
   529  				Values:        []uint64{10, 20, 30},
   530  			},
   531  			n: 3,
   532  			m: 5,
   533  		},
   534  	}
   535  
   536  	for _, tt := range tests {
   537  		t.Run(tt.name, func(t *testing.T) {
   538  			result := tt.input.Range(tt.n, tt.m)
   539  			assert.Equal(t, tt.expected.StacktraceIDs, result.StacktraceIDs)
   540  			assert.Equal(t, tt.expected.Values, result.Values)
   541  			assert.Equal(t, tt.expected.Spans, result.Spans)
   542  		})
   543  	}
   544  }
   545  
   546  func TestColumnCount(t *testing.T) {
   547  	profiles := []InMemoryProfile{{
   548  		SeriesIndex: 1,
   549  		TimeNanos:   2,
   550  		Samples: Samples{
   551  			StacktraceIDs: []uint32{1, 2, 3},
   552  			Values:        []uint64{1, 2, 3},
   553  		},
   554  	},
   555  		{
   556  			SeriesIndex: 1,
   557  			TimeNanos:   2,
   558  			Samples: Samples{
   559  				StacktraceIDs: []uint32{1, 2, 3},
   560  				Values:        []uint64{1, 2, 3},
   561  				Spans:         []uint64{1, 2, 3},
   562  			},
   563  		},
   564  		{
   565  			SeriesIndex: 1,
   566  			TimeNanos:   2,
   567  			Samples: Samples{
   568  				StacktraceIDs: []uint32{1, 2, 3},
   569  				Values:        []uint64{1, 2, 3},
   570  				Spans:         []uint64{1, 2, 3},
   571  			},
   572  			Comments: []int64{1, 2, 3},
   573  		}}
   574  	for _, profile := range profiles {
   575  		count := profileColumnCount(profile)
   576  
   577  		row := deconstructMemoryProfile(profile, nil)
   578  		assert.Equal(t, len(row), count)
   579  		assert.Equal(t, cap(row), count)
   580  	}
   581  
   582  }