github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/schemas/v1/schema_test.go (about)

     1  package v1
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"strings"
     7  	"testing"
     8  
     9  	"github.com/google/uuid"
    10  	"github.com/parquet-go/parquet-go"
    11  	"github.com/stretchr/testify/assert"
    12  	"github.com/stretchr/testify/require"
    13  
    14  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    15  )
    16  
    17  // This test ensures that the structs that are stored and the used schema matches
    18  func TestSchemaMatch(t *testing.T) {
    19  	// TODO: Unfortunately the upstream schema doesn't correctly produce a
    20  	// schema of a List of a struct pointer. This replaces this in the schema
    21  	// comparison, because this has no affect to our construct/reconstruct code
    22  	// we can simply replace the string in the schema.
    23  	profilesStructSchema := strings.ReplaceAll(
    24  		parquet.SchemaOf(&Profile{}).String(),
    25  		"optional group element",
    26  		"required group element",
    27  	)
    28  
    29  	require.Equal(t, profilesStructSchema, ProfilesSchema.String())
    30  
    31  	stacktracesStructSchema := parquet.SchemaOf(&storedStacktrace{})
    32  	require.Equal(t, strings.Replace(stacktracesStructSchema.String(), "message storedStacktrace", "message Stacktrace", 1), stacktracesSchema.String())
    33  }
    34  
    35  func newStacktraces() []*Stacktrace {
    36  	return []*Stacktrace{
    37  		{LocationIDs: []uint64{0x11}},
    38  		{LocationIDs: []uint64{}},
    39  		{LocationIDs: []uint64{12, 13}},
    40  		{LocationIDs: []uint64{}},
    41  		{LocationIDs: []uint64{14, 15}},
    42  	}
    43  }
    44  
    45  func TestStacktracesRoundTrip(t *testing.T) {
    46  	var (
    47  		s   = newStacktraces()
    48  		w   = &ReadWriter[*Stacktrace, *StacktracePersister]{}
    49  		buf bytes.Buffer
    50  	)
    51  
    52  	require.NoError(t, w.WriteParquetFile(&buf, s))
    53  
    54  	sRead, err := w.ReadParquetFile(bytes.NewReader(buf.Bytes()))
    55  	require.NoError(t, err)
    56  	assert.Equal(t, newStacktraces(), sRead)
    57  }
    58  
    59  func newStrings() []string {
    60  	return []string{
    61  		"",
    62  		"foo",
    63  		"bar",
    64  		"baz",
    65  		"",
    66  	}
    67  }
    68  
    69  func TestStringsRoundTrip(t *testing.T) {
    70  	var (
    71  		s   = newStrings()
    72  		w   = &ReadWriter[string, StringPersister]{}
    73  		buf bytes.Buffer
    74  	)
    75  
    76  	require.NoError(t, w.WriteParquetFile(&buf, s))
    77  
    78  	sRead, err := w.ReadParquetFile(bytes.NewReader(buf.Bytes()))
    79  	require.NoError(t, err)
    80  	assert.Equal(t, newStrings(), sRead)
    81  }
    82  
    83  func newProfiles() []*Profile {
    84  	return []*Profile{
    85  		{
    86  			ID:          uuid.MustParse("00000000-0000-0000-0000-000000000001"),
    87  			TimeNanos:   1001,
    88  			SeriesIndex: 0xaa,
    89  			Samples: []*Sample{
    90  				{
    91  					StacktraceID: 0xba,
    92  					Value:        0xca,
    93  					Labels:       []*profilev1.Label{},
    94  				},
    95  				{
    96  					StacktraceID: 0xbb,
    97  					Value:        0xca,
    98  					Labels: []*profilev1.Label{
    99  						{Key: 0xda, Str: 0xea},
   100  					},
   101  				},
   102  			},
   103  			Comments:    []int64{},
   104  			Annotations: []*Annotation{},
   105  		},
   106  		{
   107  			ID:          uuid.MustParse("00000000-0000-0000-0000-000000000001"),
   108  			TimeNanos:   1001,
   109  			SeriesIndex: 0xab,
   110  			Samples: []*Sample{
   111  				{
   112  					StacktraceID: 0xba,
   113  					Value:        0xcc,
   114  					Labels:       []*profilev1.Label{},
   115  				},
   116  				{
   117  					StacktraceID: 0xbb,
   118  					Value:        0xcc,
   119  					Labels: []*profilev1.Label{
   120  						{Key: 0xda, Str: 0xea},
   121  					},
   122  				},
   123  			},
   124  			Comments:    []int64{},
   125  			Annotations: []*Annotation{},
   126  		},
   127  		{
   128  			ID:          uuid.MustParse("00000000-0000-0000-0000-000000000002"),
   129  			SeriesIndex: 0xab,
   130  			TimeNanos:   1002,
   131  			Samples: []*Sample{
   132  				{
   133  					StacktraceID: 0xbc,
   134  					Value:        0xcd,
   135  					Labels:       []*profilev1.Label{},
   136  				},
   137  			},
   138  			Comments:    []int64{},
   139  			Annotations: []*Annotation{{Key: "key", Value: "test annotation"}},
   140  		},
   141  		{
   142  			ID:          uuid.MustParse("00000000-0000-0000-0000-000000000002"),
   143  			SeriesIndex: 0xac,
   144  			TimeNanos:   1002,
   145  			Samples: []*Sample{
   146  				{
   147  					StacktraceID: 0xbc,
   148  					Value:        0xce,
   149  					Labels:       []*profilev1.Label{},
   150  				},
   151  			},
   152  			Comments:    []int64{},
   153  			Annotations: []*Annotation{},
   154  		},
   155  	}
   156  }
   157  
   158  func TestProfilesRoundTrip(t *testing.T) {
   159  	var (
   160  		p   = newProfiles()
   161  		w   = &ReadWriter[*Profile, *ProfilePersister]{}
   162  		buf bytes.Buffer
   163  	)
   164  
   165  	require.NoError(t, w.WriteParquetFile(&buf, p))
   166  
   167  	sRead, err := w.ReadParquetFile(bytes.NewReader(buf.Bytes()))
   168  	require.NoError(t, err)
   169  	assert.Equal(t, newProfiles(), sRead)
   170  }
   171  
   172  func TestLocationsRoundTrip(t *testing.T) {
   173  	raw := []*profilev1.Location{
   174  		{
   175  			Id:        8,
   176  			Address:   9,
   177  			MappingId: 10,
   178  			Line: []*profilev1.Line{
   179  				{
   180  					FunctionId: 11,
   181  					Line:       12,
   182  				},
   183  				{
   184  					FunctionId: 13,
   185  					Line:       14,
   186  				},
   187  			},
   188  			IsFolded: true,
   189  		},
   190  		{
   191  			Id:        1,
   192  			Address:   2,
   193  			MappingId: 3,
   194  			Line: []*profilev1.Line{
   195  				{
   196  					FunctionId: 4,
   197  					Line:       5,
   198  				},
   199  				{
   200  					FunctionId: 6,
   201  					Line:       7,
   202  				},
   203  			},
   204  			IsFolded: false,
   205  		},
   206  		{
   207  			Id:        10,
   208  			Address:   11,
   209  			MappingId: 12,
   210  			// both pprofLocationPersister and LocationPersister deserialize as empty slice, not nil
   211  			Line:     nil,
   212  			IsFolded: false,
   213  		},
   214  		{
   215  			Id:        10,
   216  			Address:   11,
   217  			MappingId: 12,
   218  			Line:      make([]*profilev1.Line, 0),
   219  			IsFolded:  false,
   220  		},
   221  	}
   222  
   223  	mem := []InMemoryLocation{
   224  		{
   225  			Id:        8,
   226  			Address:   9,
   227  			MappingId: 10,
   228  			Line: []InMemoryLine{
   229  				{
   230  					FunctionId: 11,
   231  					Line:       12,
   232  				},
   233  				{
   234  					FunctionId: 13,
   235  					Line:       14,
   236  				},
   237  			},
   238  			IsFolded: true,
   239  		},
   240  		{
   241  			Id:        1,
   242  			Address:   2,
   243  			MappingId: 3,
   244  			Line: []InMemoryLine{
   245  				{
   246  					FunctionId: 4,
   247  					Line:       5,
   248  				},
   249  				{
   250  					FunctionId: 6,
   251  					Line:       7,
   252  				},
   253  			},
   254  			IsFolded: false,
   255  		},
   256  		{
   257  			Id:        10,
   258  			Address:   11,
   259  			MappingId: 12,
   260  			// both pprofLocationPersister and LocationPersister deserialize as empty slice, not nil
   261  			Line:     nil,
   262  			IsFolded: false,
   263  		},
   264  		{
   265  			Id:        10,
   266  			Address:   11,
   267  			MappingId: 12,
   268  			Line:      make([]InMemoryLine, 0),
   269  			IsFolded:  false,
   270  		},
   271  	}
   272  
   273  	expectedMem := func() []InMemoryLocation {
   274  		res := make([]InMemoryLocation, len(mem))
   275  		for i, loc := range mem {
   276  			if loc.Line == nil {
   277  				loc.Line = make([]InMemoryLine, 0)
   278  			}
   279  			res[i] = loc
   280  		}
   281  		return res
   282  	}
   283  
   284  	expectedRaw := func() []*profilev1.Location {
   285  		res := make([]*profilev1.Location, len(raw))
   286  		for i, loc := range raw {
   287  			cloned := loc.CloneVT()
   288  			if cloned.Line == nil {
   289  				cloned.Line = make([]*profilev1.Line, 0)
   290  			}
   291  			res[i] = cloned
   292  		}
   293  		return res
   294  	}
   295  
   296  	var buf bytes.Buffer
   297  	require.NoError(t, new(ReadWriter[*profilev1.Location, pprofLocationPersister]).WriteParquetFile(&buf, raw))
   298  	actualMem, err := new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
   299  	require.NoError(t, err)
   300  	assert.Equal(t, expectedMem(), actualMem)
   301  
   302  	buf.Reset()
   303  	require.NoError(t, new(ReadWriter[InMemoryLocation, LocationPersister]).WriteParquetFile(&buf, mem))
   304  	actualMem, err = new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
   305  	require.NoError(t, err)
   306  	assert.Equal(t, expectedMem(), actualMem)
   307  
   308  	buf.Reset()
   309  	require.NoError(t, new(ReadWriter[*profilev1.Location, pprofLocationPersister]).WriteParquetFile(&buf, raw))
   310  	actualRaw, err := new(ReadWriter[*profilev1.Location, pprofLocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
   311  	require.NoError(t, err)
   312  	assert.Equal(t, expectedRaw(), actualRaw)
   313  }
   314  
   315  var protoLocationsSchema = parquet.SchemaOf(&profilev1.Location{})
   316  
   317  type pprofLocationPersister struct{}
   318  
   319  func (pprofLocationPersister) Name() string { return "locations" }
   320  
   321  func (pprofLocationPersister) Schema() *parquet.Schema { return protoLocationsSchema }
   322  
   323  func (pprofLocationPersister) Deconstruct(row parquet.Row, loc *profilev1.Location) parquet.Row {
   324  	row = protoLocationsSchema.Deconstruct(row, loc)
   325  	return row
   326  }
   327  
   328  func (pprofLocationPersister) Reconstruct(row parquet.Row) (*profilev1.Location, error) {
   329  	var loc profilev1.Location
   330  	if err := protoLocationsSchema.Reconstruct(&loc, row); err != nil {
   331  		return nil, err
   332  	}
   333  	return &loc, nil
   334  }
   335  
   336  func TestFunctionsRoundTrip(t *testing.T) {
   337  	raw := []*profilev1.Function{
   338  		{
   339  			Id:         6,
   340  			Name:       7,
   341  			SystemName: 8,
   342  			Filename:   9,
   343  			StartLine:  10,
   344  		},
   345  		{
   346  			Id:         1,
   347  			Name:       2,
   348  			SystemName: 3,
   349  			Filename:   4,
   350  			StartLine:  5,
   351  		},
   352  	}
   353  
   354  	mem := []InMemoryFunction{
   355  		{
   356  			Id:         6,
   357  			Name:       7,
   358  			SystemName: 8,
   359  			Filename:   9,
   360  			StartLine:  10,
   361  		},
   362  		{
   363  			Id:         1,
   364  			Name:       2,
   365  			SystemName: 3,
   366  			Filename:   4,
   367  			StartLine:  5,
   368  		},
   369  	}
   370  
   371  	var buf bytes.Buffer
   372  	require.NoError(t, new(ReadWriter[*profilev1.Function, *pprofFunctionPersister]).WriteParquetFile(&buf, raw))
   373  	actual, err := new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
   374  	require.NoError(t, err)
   375  	assert.Equal(t, mem, actual)
   376  
   377  	buf.Reset()
   378  	require.NoError(t, new(ReadWriter[InMemoryFunction, FunctionPersister]).WriteParquetFile(&buf, mem))
   379  	actual, err = new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
   380  	require.NoError(t, err)
   381  	assert.Equal(t, mem, actual)
   382  }
   383  
   384  var protoFunctionSchema = parquet.SchemaOf(&profilev1.Function{})
   385  
   386  type pprofFunctionPersister struct{}
   387  
   388  func (*pprofFunctionPersister) Name() string { return "functions" }
   389  
   390  func (*pprofFunctionPersister) Schema() *parquet.Schema { return protoFunctionSchema }
   391  
   392  func (*pprofFunctionPersister) Deconstruct(row parquet.Row, loc *profilev1.Function) parquet.Row {
   393  	row = protoFunctionSchema.Deconstruct(row, loc)
   394  	return row
   395  }
   396  
   397  func (*pprofFunctionPersister) Reconstruct(row parquet.Row) (*profilev1.Function, error) {
   398  	var fn profilev1.Function
   399  	if err := protoFunctionSchema.Reconstruct(&fn, row); err != nil {
   400  		return nil, err
   401  	}
   402  	return &fn, nil
   403  }
   404  
   405  func TestMappingsRoundTrip(t *testing.T) {
   406  	raw := []*profilev1.Mapping{
   407  		{
   408  			Id:              7,
   409  			MemoryStart:     8,
   410  			MemoryLimit:     9,
   411  			FileOffset:      10,
   412  			Filename:        11,
   413  			BuildId:         12,
   414  			HasFunctions:    true,
   415  			HasFilenames:    false,
   416  			HasLineNumbers:  true,
   417  			HasInlineFrames: false,
   418  		},
   419  		{
   420  			Id:              1,
   421  			MemoryStart:     2,
   422  			MemoryLimit:     3,
   423  			FileOffset:      4,
   424  			Filename:        5,
   425  			BuildId:         6,
   426  			HasFunctions:    false,
   427  			HasFilenames:    true,
   428  			HasLineNumbers:  false,
   429  			HasInlineFrames: true,
   430  		},
   431  	}
   432  
   433  	mem := []InMemoryMapping{
   434  		{
   435  			Id:              7,
   436  			MemoryStart:     8,
   437  			MemoryLimit:     9,
   438  			FileOffset:      10,
   439  			Filename:        11,
   440  			BuildId:         12,
   441  			HasFunctions:    true,
   442  			HasFilenames:    false,
   443  			HasLineNumbers:  true,
   444  			HasInlineFrames: false,
   445  		},
   446  		{
   447  			Id:              1,
   448  			MemoryStart:     2,
   449  			MemoryLimit:     3,
   450  			FileOffset:      4,
   451  			Filename:        5,
   452  			BuildId:         6,
   453  			HasFunctions:    false,
   454  			HasFilenames:    true,
   455  			HasLineNumbers:  false,
   456  			HasInlineFrames: true,
   457  		},
   458  	}
   459  
   460  	var buf bytes.Buffer
   461  	require.NoError(t, new(ReadWriter[*profilev1.Mapping, *pprofMappingPersister]).WriteParquetFile(&buf, raw))
   462  	actual, err := new(ReadWriter[InMemoryMapping, MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
   463  	require.NoError(t, err)
   464  	assert.Equal(t, mem, actual)
   465  
   466  	//	buf.Reset()
   467  	//	require.NoError(t, new(ReadWriter[*InMemoryMapping, *MappingPersister]).WriteParquetFile(&buf, mem))
   468  	//	actual, err = new(ReadWriter[*InMemoryMapping, *MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
   469  	//	require.NoError(t, err)
   470  	//	assert.Equal(t, mem, actual)
   471  }
   472  
   473  var protoMappingSchema = parquet.SchemaOf(&profilev1.Mapping{})
   474  
   475  type pprofMappingPersister struct{}
   476  
   477  func (*pprofMappingPersister) Name() string { return "mappings" }
   478  
   479  func (*pprofMappingPersister) Schema() *parquet.Schema { return protoMappingSchema }
   480  
   481  func (*pprofMappingPersister) Deconstruct(row parquet.Row, loc *profilev1.Mapping) parquet.Row {
   482  	row = protoMappingSchema.Deconstruct(row, loc)
   483  	return row
   484  }
   485  
   486  func (*pprofMappingPersister) Reconstruct(row parquet.Row) (*profilev1.Mapping, error) {
   487  	var m profilev1.Mapping
   488  	if err := protoMappingSchema.Reconstruct(&m, row); err != nil {
   489  		return nil, err
   490  	}
   491  	return &m, nil
   492  }
   493  
   494  type ReadWriter[T any, P Persister[T]] struct{}
   495  
   496  func (r *ReadWriter[T, P]) WriteParquetFile(file io.Writer, elements []T) error {
   497  	var (
   498  		persister P
   499  		rows      = make([]parquet.Row, len(elements))
   500  	)
   501  
   502  	buffer := parquet.NewBuffer(persister.Schema())
   503  
   504  	for pos := range rows {
   505  		rows[pos] = persister.Deconstruct(rows[pos], elements[pos])
   506  	}
   507  
   508  	if _, err := buffer.WriteRows(rows); err != nil {
   509  		return err
   510  	}
   511  
   512  	writer := parquet.NewWriter(file, persister.Schema())
   513  	if _, err := parquet.CopyRows(writer, buffer.Rows()); err != nil {
   514  		return err
   515  	}
   516  
   517  	return writer.Close()
   518  }
   519  
   520  func (*ReadWriter[T, P]) ReadParquetFile(file io.ReaderAt) ([]T, error) {
   521  	var (
   522  		persister P
   523  		reader    = parquet.NewReader(file, persister.Schema())
   524  	)
   525  	defer reader.Close()
   526  
   527  	rows := make([]parquet.Row, reader.NumRows())
   528  	if _, err := reader.ReadRows(rows); err != nil {
   529  		return nil, err
   530  	}
   531  
   532  	var (
   533  		elements = make([]T, reader.NumRows())
   534  		err      error
   535  	)
   536  	for pos := range elements {
   537  		elements[pos], err = persister.Reconstruct(rows[pos])
   538  		if err != nil {
   539  			return nil, err
   540  		}
   541  	}
   542  
   543  	return elements, nil
   544  }