github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/compact_test.go (about)

     1  package phlaredb
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	_ "net/http/pprof"
     7  	"os"
     8  	"path/filepath"
     9  	"sort"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/oklog/ulid/v2"
    15  	"github.com/parquet-go/parquet-go"
    16  	"github.com/prometheus/common/model"
    17  	"github.com/prometheus/prometheus/storage"
    18  	"github.com/samber/lo"
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  
    22  	ingesterv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1"
    23  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    24  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    25  	"github.com/grafana/pyroscope/pkg/objstore/client"
    26  	"github.com/grafana/pyroscope/pkg/objstore/providers/filesystem"
    27  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    28  	"github.com/grafana/pyroscope/pkg/phlaredb/sharding"
    29  	"github.com/grafana/pyroscope/pkg/phlaredb/symdb"
    30  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
    31  	"github.com/grafana/pyroscope/pkg/pprof/testhelper"
    32  	phlarecontext "github.com/grafana/pyroscope/pkg/pyroscope/context"
    33  )
    34  
    35  func TestCompact(t *testing.T) {
    36  	ctx := context.Background()
    37  	b := newBlock(t, func() []*testhelper.ProfileBuilder {
    38  		return []*testhelper.ProfileBuilder{
    39  			testhelper.NewProfileBuilder(int64(time.Second*1)).
    40  				CPUProfile().
    41  				WithLabels(
    42  					"job", "a",
    43  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
    44  			testhelper.NewProfileBuilder(int64(time.Second*2)).
    45  				CPUProfile().
    46  				WithLabels(
    47  					"job", "b",
    48  				).
    49  				WithAnnotations("test annotation").
    50  				ForStacktraceString("foo", "bar", "baz").AddSamples(1),
    51  			testhelper.NewProfileBuilder(int64(time.Second*3)).
    52  				CPUProfile().
    53  				WithLabels(
    54  					"job", "c",
    55  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
    56  		}
    57  	})
    58  	dst := t.TempDir()
    59  	compacted, err := Compact(ctx, []BlockReader{b, b, b, b}, dst)
    60  	require.NoError(t, err)
    61  	require.Equal(t, uint64(3), compacted.Stats.NumProfiles)
    62  	require.Equal(t, uint64(3), compacted.Stats.NumSamples)
    63  	require.Equal(t, uint64(3), compacted.Stats.NumSeries)
    64  	require.Equal(t, model.TimeFromUnix(1), compacted.MinTime)
    65  	require.Equal(t, model.TimeFromUnix(3), compacted.MaxTime)
    66  	querier := blockQuerierFromMeta(t, dst, compacted)
    67  
    68  	matchAll := &ingesterv1.SelectProfilesRequest{
    69  		LabelSelector: "{}",
    70  		Type:          mustParseProfileSelector(t, "process_cpu:cpu:nanoseconds:cpu:nanoseconds"),
    71  		Start:         0,
    72  		End:           40000,
    73  	}
    74  	it, err := querier.SelectMatchingProfiles(ctx, matchAll)
    75  	require.NoError(t, err)
    76  	series, err := querier.MergeByLabels(ctx, it, nil, "job")
    77  	require.NoError(t, err)
    78  	require.Equal(t, []*typesv1.Series{
    79  		{
    80  			Labels: phlaremodel.LabelsFromStrings("job", "a"),
    81  			Points: []*typesv1.Point{{Value: float64(1), Timestamp: int64(1000), Annotations: []*typesv1.ProfileAnnotation{}}},
    82  		},
    83  		{
    84  			Labels: phlaremodel.LabelsFromStrings("job", "b"),
    85  			Points: []*typesv1.Point{
    86  				{Value: float64(1), Timestamp: int64(2000), Annotations: []*typesv1.ProfileAnnotation{
    87  					{Key: "throttled", Value: "test annotation"},
    88  				}},
    89  			},
    90  		},
    91  		{
    92  			Labels: phlaremodel.LabelsFromStrings("job", "c"),
    93  			Points: []*typesv1.Point{{Value: float64(1), Timestamp: int64(3000), Annotations: []*typesv1.ProfileAnnotation{}}},
    94  		},
    95  	}, series)
    96  
    97  	it, err = querier.SelectMatchingProfiles(ctx, matchAll)
    98  	require.NoError(t, err)
    99  	res, err := querier.MergeByStacktraces(ctx, it, 0)
   100  	require.NoError(t, err)
   101  	require.NotNil(t, res)
   102  
   103  	expected := new(phlaremodel.Tree)
   104  	expected.InsertStack(3, "baz", "bar", "foo")
   105  	require.Equal(t, expected.String(), res.String())
   106  }
   107  
   108  func TestCompactWithDownsampling(t *testing.T) {
   109  	ctx := context.Background()
   110  	b := newBlock(t, func() []*testhelper.ProfileBuilder {
   111  		return []*testhelper.ProfileBuilder{
   112  			testhelper.NewProfileBuilder(int64(time.Hour-time.Minute)).
   113  				CPUProfile().
   114  				WithLabels(
   115  					"job", "a",
   116  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   117  			testhelper.NewProfileBuilder(int64(time.Hour+time.Minute)).
   118  				CPUProfile().
   119  				WithLabels(
   120  					"job", "b",
   121  				).WithAnnotations("test annotation").
   122  				ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   123  			testhelper.NewProfileBuilder(int64(time.Hour+6*time.Minute)).
   124  				CPUProfile().
   125  				WithLabels(
   126  					"job", "c",
   127  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   128  		}
   129  	})
   130  	dst := t.TempDir()
   131  	b.meta.Compaction.Level = 2
   132  	compacted, err := Compact(ctx, []BlockReader{b, b, b, b}, dst)
   133  	require.NoError(t, err)
   134  	require.Equal(t, uint64(3), compacted.Stats.NumProfiles)
   135  	require.Equal(t, uint64(3), compacted.Stats.NumSamples)
   136  	require.Equal(t, uint64(3), compacted.Stats.NumSeries)
   137  	require.Equal(t, model.Time((time.Hour - time.Minute).Milliseconds()), compacted.MinTime)
   138  	require.Equal(t, model.Time((time.Hour + 6*time.Minute).Milliseconds()), compacted.MaxTime)
   139  
   140  	for _, f := range []*block.File{
   141  		compacted.FileByRelPath("profiles_5m_sum.parquet"),
   142  		compacted.FileByRelPath("profiles_1h_sum.parquet"),
   143  	} {
   144  		require.NotNil(t, f)
   145  		assert.NotZero(t, f.SizeBytes)
   146  	}
   147  
   148  	querier := blockQuerierFromMeta(t, dst, compacted)
   149  	matchAll := &ingesterv1.SelectProfilesRequest{
   150  		LabelSelector: "{}",
   151  		Type:          mustParseProfileSelector(t, "process_cpu:cpu:nanoseconds:cpu:nanoseconds"),
   152  		Start:         0,
   153  		End:           (time.Hour + 7*time.Minute - time.Millisecond).Milliseconds(),
   154  	}
   155  	it, err := querier.SelectMatchingProfiles(ctx, matchAll)
   156  	require.NoError(t, err)
   157  	series, err := querier.MergeByLabels(ctx, it, nil, "job")
   158  	require.NoError(t, err)
   159  	require.Equal(t, []*typesv1.Series{
   160  		{
   161  			Labels: phlaremodel.LabelsFromStrings("job", "a"),
   162  			Points: []*typesv1.Point{{Value: float64(1), Timestamp: (time.Hour - time.Minute).Milliseconds(), Annotations: []*typesv1.ProfileAnnotation{}}},
   163  		},
   164  		{
   165  			Labels: phlaremodel.LabelsFromStrings("job", "b"),
   166  			Points: []*typesv1.Point{{Value: float64(1), Timestamp: (time.Hour + time.Minute).Milliseconds(), Annotations: []*typesv1.ProfileAnnotation{{Key: "throttled", Value: "test annotation"}}}},
   167  		},
   168  		{
   169  			Labels: phlaremodel.LabelsFromStrings("job", "c"),
   170  			Points: []*typesv1.Point{{Value: float64(1), Timestamp: (time.Hour + 6*time.Minute).Milliseconds(), Annotations: []*typesv1.ProfileAnnotation{}}},
   171  		},
   172  	}, series)
   173  
   174  	it, err = querier.SelectMatchingProfiles(ctx, matchAll)
   175  	require.NoError(t, err)
   176  	res, err := querier.MergeByStacktraces(ctx, it, 0)
   177  	require.NoError(t, err)
   178  	require.NotNil(t, res)
   179  
   180  	expected := new(phlaremodel.Tree)
   181  	expected.InsertStack(3, "baz", "bar", "foo")
   182  	require.Equal(t, expected.String(), res.String())
   183  
   184  	res, err = querier.SelectMergeByStacktraces(ctx, matchAll, 0)
   185  	require.NoError(t, err)
   186  	require.NotNil(t, res)
   187  	require.Equal(t, expected.String(), res.String())
   188  	assert.False(t, querier.metrics.profileTableAccess.DeleteLabelValues(""))
   189  	assert.True(t, querier.metrics.profileTableAccess.DeleteLabelValues("profiles_5m_sum.parquet"))
   190  	assert.True(t, querier.metrics.profileTableAccess.DeleteLabelValues("profiles_1h_sum.parquet"))
   191  	assert.True(t, querier.metrics.profileTableAccess.DeleteLabelValues("profiles.parquet"))
   192  }
   193  
   194  func TestCompactWithSplitting(t *testing.T) {
   195  	ctx := context.Background()
   196  
   197  	b1 := newBlock(t, func() []*testhelper.ProfileBuilder {
   198  		return append(
   199  			profileSeriesGenerator(t, time.Unix(1, 0), time.Unix(10, 0), time.Second, "job", "a"),
   200  			profileSeriesGenerator(t, time.Unix(11, 0), time.Unix(20, 0), time.Second, "job", "b")...,
   201  		)
   202  	})
   203  	b2 := newBlock(t, func() []*testhelper.ProfileBuilder {
   204  		return append(
   205  			append(
   206  				append(
   207  					profileSeriesGenerator(t, time.Unix(1, 0), time.Unix(10, 0), time.Second, "job", "c"),
   208  					profileSeriesGenerator(t, time.Unix(11, 0), time.Unix(20, 0), time.Second, "job", "d")...,
   209  				), profileSeriesGenerator(t, time.Unix(1, 0), time.Unix(10, 0), time.Second, "job", "a")...,
   210  			),
   211  			profileSeriesGenerator(t, time.Unix(11, 0), time.Unix(20, 0), time.Second, "job", "b")...,
   212  		)
   213  	})
   214  	dst := t.TempDir()
   215  	compacted, err := CompactWithSplitting(ctx, CompactWithSplittingOpts{
   216  		Src:                []BlockReader{b1, b2, b2, b1},
   217  		Dst:                dst,
   218  		SplitCount:         16,
   219  		StageSize:          8,
   220  		SplitBy:            SplitByFingerprint,
   221  		DownsamplerEnabled: true,
   222  		Logger:             log.NewNopLogger(),
   223  	})
   224  	require.NoError(t, err)
   225  
   226  	require.NoDirExists(t, filepath.Join(dst, symdb.DefaultDirName))
   227  
   228  	// 4 shards one per series.
   229  	require.Equal(t, 4, len(compacted))
   230  	require.Equal(t, "1_of_16", compacted[0].Labels[sharding.CompactorShardIDLabel])
   231  	require.Equal(t, "6_of_16", compacted[1].Labels[sharding.CompactorShardIDLabel])
   232  	require.Equal(t, "7_of_16", compacted[2].Labels[sharding.CompactorShardIDLabel])
   233  	require.Equal(t, "14_of_16", compacted[3].Labels[sharding.CompactorShardIDLabel])
   234  
   235  	require.Equal(t, model.TimeFromUnix(1), compacted[1].MinTime)
   236  	require.Equal(t, model.TimeFromUnix(20), compacted[1].MaxTime)
   237  
   238  	// We first verify we have all series and timestamps across querying all blocks.
   239  	queriers := make(Queriers, len(compacted))
   240  	for i, blk := range compacted {
   241  		queriers[i] = blockQuerierFromMeta(t, dst, blk)
   242  	}
   243  
   244  	err = queriers.Open(context.Background())
   245  	require.NoError(t, err)
   246  	matchAll := &ingesterv1.SelectProfilesRequest{
   247  		LabelSelector: "{}",
   248  		Type:          mustParseProfileSelector(t, "process_cpu:cpu:nanoseconds:cpu:nanoseconds"),
   249  		Start:         0,
   250  		End:           40000,
   251  	}
   252  	it, err := queriers.SelectMatchingProfiles(context.Background(), matchAll)
   253  	require.NoError(t, err)
   254  
   255  	seriesMap := make(map[model.Fingerprint]lo.Tuple2[phlaremodel.Labels, []model.Time])
   256  	for it.Next() {
   257  		r := it.At()
   258  		seriesMap[r.Fingerprint()] = lo.T2(r.Labels().WithoutPrivateLabels(), append(seriesMap[r.Fingerprint()].B, r.Timestamp()))
   259  	}
   260  	require.NoError(t, it.Err())
   261  	require.NoError(t, it.Close())
   262  	series := lo.Values(seriesMap)
   263  	sort.Slice(series, func(i, j int) bool {
   264  		return phlaremodel.CompareLabelPairs(series[i].A, series[j].A) < 0
   265  	})
   266  	require.Equal(t, []lo.Tuple2[phlaremodel.Labels, []model.Time]{
   267  		lo.T2(phlaremodel.LabelsFromStrings("job", "a"),
   268  			generateTimes(t, model.TimeFromUnix(1), model.TimeFromUnix(10)),
   269  		),
   270  		lo.T2(phlaremodel.LabelsFromStrings("job", "b"),
   271  			generateTimes(t, model.TimeFromUnix(11), model.TimeFromUnix(20)),
   272  		),
   273  		lo.T2(phlaremodel.LabelsFromStrings("job", "c"),
   274  			generateTimes(t, model.TimeFromUnix(1), model.TimeFromUnix(10)),
   275  		),
   276  		lo.T2(phlaremodel.LabelsFromStrings("job", "d"),
   277  			generateTimes(t, model.TimeFromUnix(11), model.TimeFromUnix(20)),
   278  		),
   279  	}, series)
   280  
   281  	// Then we query 2 different shards and verify we have a subset of series.
   282  	it, err = queriers[0].SelectMatchingProfiles(ctx, matchAll)
   283  	require.NoError(t, err)
   284  	seriesResult, err := queriers[0].MergeByLabels(context.Background(), it, nil, "job")
   285  	require.NoError(t, err)
   286  	require.Equal(t,
   287  		[]*typesv1.Series{
   288  			{
   289  				Labels: phlaremodel.LabelsFromStrings("job", "a"),
   290  				Points: generatePoints(t, model.TimeFromUnix(1), model.TimeFromUnix(10)),
   291  			},
   292  		}, seriesResult)
   293  
   294  	it, err = queriers[1].SelectMatchingProfiles(ctx, matchAll)
   295  	require.NoError(t, err)
   296  	seriesResult, err = queriers[1].MergeByLabels(context.Background(), it, nil, "job")
   297  	require.NoError(t, err)
   298  	require.Equal(t,
   299  		[]*typesv1.Series{
   300  			{
   301  				Labels: phlaremodel.LabelsFromStrings("job", "b"),
   302  				Points: generatePoints(t, model.TimeFromUnix(11), model.TimeFromUnix(20)),
   303  			},
   304  		}, seriesResult)
   305  
   306  	// Finally test some stacktraces resolution.
   307  	it, err = queriers[1].SelectMatchingProfiles(ctx, matchAll)
   308  	require.NoError(t, err)
   309  	res, err := queriers[1].MergeByStacktraces(ctx, it, 0)
   310  	require.NoError(t, err)
   311  
   312  	expected := new(phlaremodel.Tree)
   313  	expected.InsertStack(10, "baz", "bar", "foo")
   314  	require.Equal(t, expected.String(), res.String())
   315  }
   316  
   317  // nolint:unparam
   318  func profileSeriesGenerator(t *testing.T, from, through time.Time, interval time.Duration, lbls ...string) []*testhelper.ProfileBuilder {
   319  	t.Helper()
   320  	var builders []*testhelper.ProfileBuilder
   321  	for ts := from; ts.Before(through) || ts.Equal(through); ts = ts.Add(interval) {
   322  		builders = append(builders,
   323  			testhelper.NewProfileBuilder(ts.UnixNano()).
   324  				CPUProfile().
   325  				WithLabels(
   326  					lbls...,
   327  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1))
   328  	}
   329  	return builders
   330  }
   331  
   332  func generatePoints(t *testing.T, from, through model.Time) []*typesv1.Point {
   333  	t.Helper()
   334  	var points []*typesv1.Point
   335  	for ts := from; ts.Before(through) || ts.Equal(through); ts = ts.Add(time.Second) {
   336  		points = append(points, &typesv1.Point{Timestamp: int64(ts), Value: 1, Annotations: []*typesv1.ProfileAnnotation{}})
   337  	}
   338  	return points
   339  }
   340  
   341  func generateTimes(t *testing.T, from, through model.Time) []model.Time {
   342  	t.Helper()
   343  	var times []model.Time
   344  	for ts := from; ts.Before(through) || ts.Equal(through); ts = ts.Add(time.Second) {
   345  		times = append(times, ts)
   346  	}
   347  	return times
   348  }
   349  
   350  func TestProfileRowIterator(t *testing.T) {
   351  	b := newBlock(t, func() []*testhelper.ProfileBuilder {
   352  		return []*testhelper.ProfileBuilder{
   353  			testhelper.NewProfileBuilder(int64(1)).
   354  				CPUProfile().
   355  				WithLabels(
   356  					"job", "a",
   357  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   358  			testhelper.NewProfileBuilder(int64(2)).
   359  				CPUProfile().
   360  				WithLabels(
   361  					"job", "b",
   362  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   363  			testhelper.NewProfileBuilder(int64(3)).
   364  				CPUProfile().
   365  				WithLabels(
   366  					"job", "c",
   367  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   368  		}
   369  	})
   370  
   371  	it, err := newProfileRowIterator(b)
   372  	require.NoError(t, err)
   373  
   374  	assert.True(t, it.Next())
   375  	require.Equal(t, it.At().labels.WithoutPrivateLabels(), phlaremodel.Labels{
   376  		&typesv1.LabelPair{Name: "job", Value: "a"},
   377  	})
   378  	require.Equal(t, it.At().timeNanos, int64(1))
   379  
   380  	assert.True(t, it.Next())
   381  	require.Equal(t, it.At().labels.WithoutPrivateLabels(), phlaremodel.Labels{
   382  		&typesv1.LabelPair{Name: "job", Value: "b"},
   383  	})
   384  	require.Equal(t, it.At().timeNanos, int64(2))
   385  
   386  	assert.True(t, it.Next())
   387  	require.Equal(t, it.At().labels.WithoutPrivateLabels(), phlaremodel.Labels{
   388  		&typesv1.LabelPair{Name: "job", Value: "c"},
   389  	})
   390  	require.Equal(t, it.At().timeNanos, int64(3))
   391  
   392  	assert.False(t, it.Next())
   393  	require.NoError(t, it.Err())
   394  	require.NoError(t, it.Close())
   395  }
   396  
   397  func TestMergeRowProfileIterator(t *testing.T) {
   398  	type profile struct {
   399  		timeNanos int64
   400  		labels    phlaremodel.Labels
   401  	}
   402  
   403  	a, b, c := phlaremodel.Labels{
   404  		&typesv1.LabelPair{Name: "job", Value: "a"},
   405  	}, phlaremodel.Labels{
   406  		&typesv1.LabelPair{Name: "job", Value: "b"},
   407  	}, phlaremodel.Labels{
   408  		&typesv1.LabelPair{Name: "job", Value: "c"},
   409  	}
   410  
   411  	for _, tc := range []struct {
   412  		name     string
   413  		in       [][]profile
   414  		expected []profile
   415  	}{
   416  		{
   417  			name: "only duplicates",
   418  			in: [][]profile{
   419  				{
   420  					{timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c},
   421  				},
   422  				{
   423  					{timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c},
   424  				},
   425  				{
   426  					{timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c},
   427  				},
   428  				{
   429  					{timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c},
   430  				},
   431  			},
   432  			expected: []profile{
   433  				{timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c},
   434  			},
   435  		},
   436  		{
   437  			name: "missing some",
   438  			in: [][]profile{
   439  				{
   440  					{timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, {timeNanos: 4, labels: c},
   441  				},
   442  				{
   443  					{timeNanos: 1, labels: a},
   444  				},
   445  				{
   446  					{timeNanos: 2, labels: b}, {timeNanos: 3, labels: c},
   447  				},
   448  			},
   449  			expected: []profile{
   450  				{timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c}, {timeNanos: 4, labels: c},
   451  			},
   452  		},
   453  		{
   454  			name: "no duplicates",
   455  			in: [][]profile{
   456  				{
   457  					{timeNanos: 2, labels: b},
   458  				},
   459  				{
   460  					{timeNanos: 1, labels: a},
   461  				},
   462  				{
   463  					{timeNanos: 3, labels: c},
   464  				},
   465  			},
   466  			expected: []profile{
   467  				{timeNanos: 1, labels: a}, {timeNanos: 2, labels: b}, {timeNanos: 3, labels: c},
   468  			},
   469  		},
   470  	} {
   471  		t.Run(tc.name, func(t *testing.T) {
   472  			blocks := make([]BlockReader, len(tc.in))
   473  			for i, profiles := range tc.in {
   474  				blocks[i] = newBlock(t, func() []*testhelper.ProfileBuilder {
   475  					var builders []*testhelper.ProfileBuilder
   476  					for _, p := range profiles {
   477  						prof := testhelper.NewProfileBuilder(p.timeNanos).
   478  							CPUProfile().ForStacktraceString("foo").AddSamples(1)
   479  						for _, l := range p.labels {
   480  							prof.WithLabels(l.Name, l.Value)
   481  						}
   482  						builders = append(builders, prof)
   483  					}
   484  					return builders
   485  				})
   486  			}
   487  			it, err := newMergeRowProfileIterator(blocks)
   488  			require.NoError(t, err)
   489  			actual := []profile{}
   490  			for it.Next() {
   491  				actual = append(actual, profile{
   492  					timeNanos: it.At().timeNanos,
   493  					labels:    it.At().labels.WithoutPrivateLabels(),
   494  				})
   495  				require.Equal(t, model.Fingerprint(it.At().labels.Hash()), it.At().fp)
   496  			}
   497  			require.NoError(t, it.Err())
   498  			require.NoError(t, it.Close())
   499  			require.Equal(t, tc.expected, actual)
   500  		})
   501  	}
   502  }
   503  
   504  func TestSeriesRewriter(t *testing.T) {
   505  	type profile struct {
   506  		timeNanos int64
   507  		labels    phlaremodel.Labels
   508  	}
   509  
   510  	in := []profile{
   511  		{1, phlaremodel.LabelsFromStrings("job", "a")},
   512  		{2, phlaremodel.LabelsFromStrings("job", "a")},
   513  		{3, phlaremodel.LabelsFromStrings("job", "a")},
   514  		{2, phlaremodel.LabelsFromStrings("job", "b")},
   515  		{1, phlaremodel.LabelsFromStrings("job", "c")},
   516  		{2, phlaremodel.LabelsFromStrings("job", "c")},
   517  	}
   518  
   519  	blk := newBlock(t, func() []*testhelper.ProfileBuilder {
   520  		var builders []*testhelper.ProfileBuilder
   521  		for _, p := range in {
   522  			prof := testhelper.NewProfileBuilder(p.timeNanos).
   523  				CPUProfile().ForStacktraceString("foo").AddSamples(1)
   524  			for _, l := range p.labels {
   525  				prof.WithLabels(l.Name, l.Value)
   526  			}
   527  			builders = append(builders, prof)
   528  		}
   529  		return builders
   530  	})
   531  	rows, err := newProfileRowIterator(blk)
   532  	require.NoError(t, err)
   533  	path := t.TempDir()
   534  	filePath := filepath.Join(path, block.IndexFilename)
   535  	idxw := newIndexRewriter(path)
   536  	seriesIdx := []uint32{}
   537  	for rows.Next() {
   538  		r := rows.At()
   539  		require.NoError(t, idxw.ReWriteRow(r))
   540  		seriesIdx = append(seriesIdx, r.row.SeriesIndex())
   541  	}
   542  	require.NoError(t, rows.Err())
   543  	require.NoError(t, rows.Close())
   544  
   545  	require.Equal(t, []uint32{0, 0, 0, 1, 2, 2}, seriesIdx)
   546  
   547  	err = idxw.Close(context.Background())
   548  	require.NoError(t, err)
   549  
   550  	idxr, err := index.NewFileReader(filePath)
   551  	require.NoError(t, err)
   552  	defer idxr.Close()
   553  
   554  	k, v := index.AllPostingsKey()
   555  	p, err := idxr.Postings(k, nil, v)
   556  	require.NoError(t, err)
   557  
   558  	chunks := make([]index.ChunkMeta, 1)
   559  	var lbs phlaremodel.Labels
   560  
   561  	require.True(t, p.Next())
   562  	fp, err := idxr.Series(p.At(), &lbs, &chunks)
   563  	require.NoError(t, err)
   564  	require.Equal(t, model.Fingerprint(lbs.Hash()), model.Fingerprint(fp))
   565  	require.Equal(t, lbs.WithoutPrivateLabels(), phlaremodel.LabelsFromStrings("job", "a"))
   566  	require.Equal(t, []index.ChunkMeta{{
   567  		SeriesIndex: 0,
   568  		MinTime:     int64(1),
   569  		MaxTime:     int64(3),
   570  	}}, chunks)
   571  
   572  	require.True(t, p.Next())
   573  	fp, err = idxr.Series(p.At(), &lbs, &chunks)
   574  	require.NoError(t, err)
   575  	require.Equal(t, model.Fingerprint(lbs.Hash()), model.Fingerprint(fp))
   576  	require.Equal(t, lbs.WithoutPrivateLabels(), phlaremodel.LabelsFromStrings("job", "b"))
   577  	require.Equal(t, []index.ChunkMeta{{
   578  		SeriesIndex: 1,
   579  		MinTime:     int64(2),
   580  		MaxTime:     int64(2),
   581  	}}, chunks)
   582  
   583  	require.True(t, p.Next())
   584  	fp, err = idxr.Series(p.At(), &lbs, &chunks)
   585  	require.NoError(t, err)
   586  	require.Equal(t, model.Fingerprint(lbs.Hash()), model.Fingerprint(fp))
   587  	require.Equal(t, lbs.WithoutPrivateLabels(), phlaremodel.LabelsFromStrings("job", "c"))
   588  	require.Equal(t, []index.ChunkMeta{{
   589  		SeriesIndex: 2,
   590  		MinTime:     int64(1),
   591  		MaxTime:     int64(2),
   592  	}}, chunks)
   593  }
   594  
   595  func TestCompactOldBlock(t *testing.T) {
   596  	meta, err := block.ReadMetaFromDir("./testdata/01HD3X85G9BGAG4S3TKPNMFG4Z")
   597  	require.NoError(t, err)
   598  	dst := t.TempDir()
   599  	ctx := context.Background()
   600  	t.Log(meta)
   601  	bkt, err := client.NewBucket(ctx, client.Config{
   602  		StorageBackendConfig: client.StorageBackendConfig{
   603  			Backend: client.Filesystem,
   604  			Filesystem: filesystem.Config{
   605  				Directory: "./testdata/",
   606  			},
   607  		},
   608  	}, "test")
   609  	require.NoError(t, err)
   610  	br := NewSingleBlockQuerierFromMeta(context.Background(), bkt, meta)
   611  	require.NoError(t, br.Open(ctx))
   612  	_, err = CompactWithSplitting(ctx, CompactWithSplittingOpts{
   613  		Src:                []BlockReader{br},
   614  		Dst:                dst,
   615  		SplitCount:         2,
   616  		StageSize:          0,
   617  		SplitBy:            SplitByFingerprint,
   618  		DownsamplerEnabled: true,
   619  	})
   620  	require.NoError(t, err)
   621  }
   622  
   623  func TestFlushMeta(t *testing.T) {
   624  	b := newBlock(t, func() []*testhelper.ProfileBuilder {
   625  		return []*testhelper.ProfileBuilder{
   626  			testhelper.NewProfileBuilder(int64(time.Second*1)).
   627  				CPUProfile().
   628  				WithLabels(
   629  					"job", "a",
   630  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   631  			testhelper.NewProfileBuilder(int64(time.Second*2)).
   632  				CPUProfile().
   633  				WithLabels(
   634  					"job", "b",
   635  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   636  			testhelper.NewProfileBuilder(int64(time.Second*3)).
   637  				CPUProfile().
   638  				WithLabels(
   639  					"job", "c",
   640  				).ForStacktraceString("foo", "bar", "baz").AddSamples(1),
   641  		}
   642  	})
   643  
   644  	require.Equal(t, []ulid.ULID{b.Meta().ULID}, b.Meta().Compaction.Sources)
   645  	require.Equal(t, 1, b.Meta().Compaction.Level)
   646  	require.Equal(t, false, b.Meta().Compaction.Deletable)
   647  	require.Equal(t, false, b.Meta().Compaction.Failed)
   648  	require.Equal(t, []string(nil), b.Meta().Compaction.Hints)
   649  	require.Equal(t, []block.BlockDesc(nil), b.Meta().Compaction.Parents)
   650  	require.Equal(t, block.MetaVersion3, b.Meta().Version)
   651  	require.Equal(t, model.Time(1000), b.Meta().MinTime)
   652  	require.Equal(t, model.Time(3000), b.Meta().MaxTime)
   653  	require.Equal(t, uint64(3), b.Meta().Stats.NumSeries)
   654  	require.Equal(t, uint64(3), b.Meta().Stats.NumSamples)
   655  	require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles)
   656  	require.Len(t, b.Meta().Files, 8)
   657  	require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath)
   658  	require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath)
   659  	require.Equal(t, "symbols/functions.parquet", b.Meta().Files[2].RelPath)
   660  	require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath)
   661  	require.Equal(t, "symbols/locations.parquet", b.Meta().Files[4].RelPath)
   662  	require.Equal(t, "symbols/mappings.parquet", b.Meta().Files[5].RelPath)
   663  	require.Equal(t, "symbols/stacktraces.symdb", b.Meta().Files[6].RelPath)
   664  	require.Equal(t, "symbols/strings.parquet", b.Meta().Files[7].RelPath)
   665  }
   666  
   667  func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier {
   668  	t.Helper()
   669  	dir := t.TempDir()
   670  	ctx := phlarecontext.WithLogger(context.Background(), log.NewNopLogger())
   671  	h, err := NewHead(ctx, Config{
   672  		DataPath:         dir,
   673  		MaxBlockDuration: 24 * time.Hour,
   674  		Parquet: &ParquetConfig{
   675  			MaxBufferRowCount: 10,
   676  		},
   677  	}, NoLimit)
   678  	require.NoError(t, err)
   679  
   680  	// ingest.
   681  	for _, p := range generator() {
   682  		require.NoError(t, h.Ingest(ctx, p.Profile, p.UUID, p.Annotations, p.Labels...))
   683  	}
   684  
   685  	require.NoError(t, h.Flush(ctx))
   686  	require.NoError(t, h.Move())
   687  
   688  	bkt, err := client.NewBucket(ctx, client.Config{
   689  		StorageBackendConfig: client.StorageBackendConfig{
   690  			Backend: client.Filesystem,
   691  			Filesystem: filesystem.Config{
   692  				Directory: dir,
   693  			},
   694  		},
   695  		Prefix: "local",
   696  	}, "test")
   697  	require.NoError(t, err)
   698  	metaMap, err := block.ListBlocks(filepath.Join(dir, PathLocal), time.Time{})
   699  	require.NoError(t, err)
   700  	require.Len(t, metaMap, 1)
   701  	var meta *block.Meta
   702  	for _, m := range metaMap {
   703  		meta = m
   704  	}
   705  	blk := NewSingleBlockQuerierFromMeta(ctx, bkt, meta)
   706  	require.NoError(t, blk.Open(ctx))
   707  	return blk
   708  }
   709  
   710  func blockQuerierFromMeta(t *testing.T, dir string, m block.Meta) *singleBlockQuerier {
   711  	t.Helper()
   712  	ctx := context.Background()
   713  	bkt, err := client.NewBucket(ctx, client.Config{
   714  		StorageBackendConfig: client.StorageBackendConfig{
   715  			Backend: client.Filesystem,
   716  			Filesystem: filesystem.Config{
   717  				Directory: dir,
   718  			},
   719  		},
   720  		Prefix: "",
   721  	}, "test")
   722  	require.NoError(t, err)
   723  	blk := NewSingleBlockQuerierFromMeta(ctx, bkt, &m)
   724  	require.NoError(t, blk.Open(ctx))
   725  	return blk
   726  }
   727  
   728  func TestCompactMetas(t *testing.T) {
   729  	actual := compactMetas([]block.Meta{
   730  		{
   731  			ULID:    ulid.MustParse("00000000000000000000000001"),
   732  			MinTime: model.TimeFromUnix(0),
   733  			MaxTime: model.TimeFromUnix(100),
   734  			Compaction: block.BlockMetaCompaction{
   735  				Level:   1,
   736  				Sources: []ulid.ULID{ulid.MustParse("00000000000000000000000001")},
   737  			},
   738  			Labels: map[string]string{"foo": "bar"},
   739  		},
   740  		{
   741  			ULID:    ulid.MustParse("00000000000000000000000002"),
   742  			MinTime: model.TimeFromUnix(50),
   743  			MaxTime: model.TimeFromUnix(100),
   744  			Compaction: block.BlockMetaCompaction{
   745  				Level:   0,
   746  				Sources: []ulid.ULID{ulid.MustParse("00000000000000000000000002")},
   747  			},
   748  			Labels: map[string]string{"bar": "buzz"},
   749  		},
   750  		{
   751  			ULID:    ulid.MustParse("00000000000000000000000003"),
   752  			MinTime: model.TimeFromUnix(50),
   753  			MaxTime: model.TimeFromUnix(200),
   754  			Compaction: block.BlockMetaCompaction{
   755  				Level:   3,
   756  				Sources: []ulid.ULID{ulid.MustParse("00000000000000000000000003")},
   757  			},
   758  		},
   759  	}...)
   760  	labels := map[string]string{"foo": "bar", "bar": "buzz"}
   761  	require.Equal(t, model.TimeFromUnix(0), actual.MinTime)
   762  	require.Equal(t, model.TimeFromUnix(200), actual.MaxTime)
   763  	require.Equal(t, block.BlockMetaCompaction{
   764  		Level: 4,
   765  		Sources: []ulid.ULID{
   766  			ulid.MustParse("00000000000000000000000001"),
   767  			ulid.MustParse("00000000000000000000000002"),
   768  			ulid.MustParse("00000000000000000000000003"),
   769  		},
   770  		Parents: []block.BlockDesc{
   771  			{
   772  				ULID:    ulid.MustParse("00000000000000000000000001"),
   773  				MinTime: 0,
   774  				MaxTime: 100000,
   775  			},
   776  			{
   777  				ULID:    ulid.MustParse("00000000000000000000000002"),
   778  				MinTime: 50000,
   779  				MaxTime: 100000,
   780  			},
   781  			{
   782  				ULID:    ulid.MustParse("00000000000000000000000003"),
   783  				MinTime: 50000,
   784  				MaxTime: 200000,
   785  			},
   786  		},
   787  	}, actual.Compaction)
   788  	require.Equal(t, labels, actual.Labels)
   789  	require.Equal(t, block.CompactorSource, actual.Source)
   790  }
   791  
   792  func TestMetaFilesFromDir(t *testing.T) {
   793  	dst := t.TempDir()
   794  	generateParquetFile(t, filepath.Join(dst, "foo.parquet"))
   795  	generateParquetFile(t, filepath.Join(dst, "symbols", "bar.parquet"))
   796  	generateFile(t, filepath.Join(dst, "symbols", "index.symdb"), 100)
   797  	generateFile(t, filepath.Join(dst, "symbols", "stacktraces.symdb"), 200)
   798  	generateIndexFile(t, dst)
   799  	actual, err := metaFilesFromDir(dst)
   800  
   801  	require.NoError(t, err)
   802  	require.Equal(t, 5, len(actual))
   803  	require.Equal(t, []block.File{
   804  		{
   805  			Parquet: &block.ParquetFile{
   806  				NumRows:      100,
   807  				NumRowGroups: 10,
   808  			},
   809  			RelPath:   "foo.parquet",
   810  			SizeBytes: fileSize(t, filepath.Join(dst, "foo.parquet")),
   811  		},
   812  		{
   813  			RelPath:   block.IndexFilename,
   814  			SizeBytes: fileSize(t, filepath.Join(dst, block.IndexFilename)),
   815  			TSDB: &block.TSDBFile{
   816  				NumSeries: 3,
   817  			},
   818  		},
   819  		{
   820  			Parquet: &block.ParquetFile{
   821  				NumRows:      100,
   822  				NumRowGroups: 10,
   823  			},
   824  			RelPath:   filepath.Join("symbols", "bar.parquet"),
   825  			SizeBytes: fileSize(t, filepath.Join(dst, "symbols", "bar.parquet")),
   826  		},
   827  		{
   828  			RelPath:   filepath.Join("symbols", "index.symdb"),
   829  			SizeBytes: fileSize(t, filepath.Join(dst, "symbols", "index.symdb")),
   830  		},
   831  		{
   832  			RelPath:   filepath.Join("symbols", "stacktraces.symdb"),
   833  			SizeBytes: fileSize(t, filepath.Join(dst, "symbols", "stacktraces.symdb")),
   834  		},
   835  	}, actual)
   836  }
   837  
   838  func fileSize(t *testing.T, path string) uint64 {
   839  	t.Helper()
   840  	fi, err := os.Stat(path)
   841  	require.NoError(t, err)
   842  	return uint64(fi.Size())
   843  }
   844  
   845  func generateFile(t *testing.T, path string, size int) {
   846  	t.Helper()
   847  	require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
   848  	f, err := os.Create(path)
   849  	require.NoError(t, err)
   850  	defer f.Close()
   851  	require.NoError(t, f.Truncate(int64(size)))
   852  }
   853  
   854  func generateIndexFile(t *testing.T, dir string) {
   855  	t.Helper()
   856  	filePath := filepath.Join(dir, block.IndexFilename)
   857  	idxw, err := index.NewWriter(context.Background(), filePath)
   858  	require.NoError(t, err)
   859  	require.NoError(t, idxw.AddSymbol("a"))
   860  	require.NoError(t, idxw.AddSymbol("b"))
   861  	require.NoError(t, idxw.AddSymbol("c"))
   862  	addSeries(t, idxw, 0, phlaremodel.Labels{
   863  		&typesv1.LabelPair{Name: "a", Value: "b"},
   864  	})
   865  	addSeries(t, idxw, 1, phlaremodel.Labels{
   866  		&typesv1.LabelPair{Name: "a", Value: "c"},
   867  	})
   868  	addSeries(t, idxw, 2, phlaremodel.Labels{
   869  		&typesv1.LabelPair{Name: "b", Value: "a"},
   870  	})
   871  	require.NoError(t, idxw.Close())
   872  }
   873  
   874  func addSeries(t *testing.T, idxw *index.Writer, idx int, labels phlaremodel.Labels) {
   875  	t.Helper()
   876  	require.NoError(t, idxw.AddSeries(storage.SeriesRef(idx), labels, model.Fingerprint(labels.Hash()), index.ChunkMeta{SeriesIndex: uint32(idx)}))
   877  }
   878  
   879  func generateParquetFile(t *testing.T, path string) {
   880  	t.Helper()
   881  	require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
   882  	file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644)
   883  	require.NoError(t, err)
   884  	defer file.Close()
   885  
   886  	writer := parquet.NewGenericWriter[struct{ Name string }](file, parquet.MaxRowsPerRowGroup(10))
   887  	defer writer.Close()
   888  	for i := 0; i < 100; i++ {
   889  		_, err := writer.Write([]struct{ Name string }{
   890  			{Name: fmt.Sprintf("name-%d", i)},
   891  		})
   892  		require.NoError(t, err)
   893  	}
   894  }
   895  
   896  func Test_SplitStages(t *testing.T) {
   897  	tests := []struct {
   898  		n, s   int
   899  		result [][]int
   900  	}{
   901  		{12, 3, [][]int{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}},
   902  		{7, 3, [][]int{{0, 1, 2}, {3, 4, 5}, {6}}},
   903  		{10, 2, [][]int{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}}},
   904  		{5, 5, [][]int{{0, 1, 2, 3, 4}}},
   905  	}
   906  
   907  	for _, test := range tests {
   908  		assert.Equal(t, test.result, splitStages(test.n, test.s))
   909  	}
   910  }
   911  
   912  func Benchmark_CompactSplit(b *testing.B) {
   913  	ctx := phlarecontext.WithLogger(context.Background(), log.NewNopLogger())
   914  
   915  	bkt, err := client.NewBucket(ctx, client.Config{
   916  		StorageBackendConfig: client.StorageBackendConfig{
   917  			Backend: client.Filesystem,
   918  			Filesystem: filesystem.Config{
   919  				Directory: "./testdata/",
   920  			},
   921  		},
   922  		Prefix: "",
   923  	}, "test")
   924  	require.NoError(b, err)
   925  	meta, err := block.ReadMetaFromDir("./testdata/01HHYG6245NWHZWVP27V8WJRT7")
   926  	require.NoError(b, err)
   927  	bl := NewSingleBlockQuerierFromMeta(ctx, bkt, meta)
   928  	require.NoError(b, bl.Open(ctx))
   929  	dst := b.TempDir()
   930  
   931  	b.ResetTimer()
   932  	b.ReportAllocs()
   933  	for i := 0; i < b.N; i++ {
   934  		_, err = CompactWithSplitting(ctx, CompactWithSplittingOpts{
   935  			Src:                []BlockReader{bl},
   936  			Dst:                dst,
   937  			SplitCount:         32,
   938  			StageSize:          32,
   939  			SplitBy:            SplitByFingerprint,
   940  			DownsamplerEnabled: true,
   941  			Logger:             log.NewNopLogger(),
   942  		})
   943  		require.NoError(b, err)
   944  	}
   945  }