github.com/grafana/pyroscope@v1.18.0/pkg/compactor/bucket_compactor_e2e_test.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/bucket_compactor_e2e_test.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"bytes"
    10  	"context"
    11  	"encoding/json"
    12  	"fmt"
    13  	"os"
    14  	"path"
    15  	"sort"
    16  	"strings"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/go-kit/log"
    21  	"github.com/oklog/ulid/v2"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  	"github.com/prometheus/client_golang/prometheus/promauto"
    24  	promtest "github.com/prometheus/client_golang/prometheus/testutil"
    25  	"github.com/prometheus/common/model"
    26  	"github.com/stretchr/testify/assert"
    27  	"github.com/stretchr/testify/require"
    28  	"github.com/thanos-io/objstore"
    29  
    30  	phlareobj "github.com/grafana/pyroscope/pkg/objstore"
    31  	"github.com/grafana/pyroscope/pkg/objstore/providers/filesystem"
    32  	"github.com/grafana/pyroscope/pkg/phlaredb"
    33  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    34  )
    35  
    36  func TestSyncer_GarbageCollect_e2e(t *testing.T) {
    37  	foreachStore(t, func(t *testing.T, bkt phlareobj.Bucket) {
    38  		// Use bucket with global markers to make sure that our custom filters work correctly.
    39  		bkt = block.BucketWithGlobalMarkers(bkt)
    40  
    41  		ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
    42  		defer cancel()
    43  
    44  		// Generate 10 source block metas and construct higher level blocks
    45  		// that are higher compactions of them.
    46  		var metas []*block.Meta
    47  		var ids []ulid.ULID
    48  
    49  		for i := 0; i < 10; i++ {
    50  			var m block.Meta
    51  
    52  			m.Version = 1
    53  			m.ULID = ulid.MustNew(uint64(i), nil)
    54  			m.Compaction.Sources = []ulid.ULID{m.ULID}
    55  			m.Compaction.Level = 1
    56  			m.MinTime = 0
    57  			m.MaxTime = model.Time(2 * time.Hour.Milliseconds())
    58  
    59  			ids = append(ids, m.ULID)
    60  			metas = append(metas, &m)
    61  		}
    62  
    63  		var m1 block.Meta
    64  		m1.Version = 1
    65  		m1.ULID = ulid.MustNew(100, nil)
    66  		m1.Compaction.Level = 2
    67  		m1.Compaction.Sources = ids[:4]
    68  		m1.Resolution = 0
    69  
    70  		var m2 block.Meta
    71  		m2.Version = 1
    72  		m2.ULID = ulid.MustNew(200, nil)
    73  		m2.Compaction.Level = 2
    74  		m2.Compaction.Sources = ids[4:8] // last two source IDs is not part of a level 2 block.
    75  		m2.Resolution = 0
    76  
    77  		var m3 block.Meta
    78  		m3.Version = 1
    79  		m3.ULID = ulid.MustNew(300, nil)
    80  		m3.Compaction.Level = 3
    81  		m3.Compaction.Sources = ids[:9] // last source ID is not part of level 3 block.
    82  		m3.Resolution = 0
    83  		m3.MinTime = 0
    84  		m3.MaxTime = model.Time(2 * time.Hour.Milliseconds())
    85  
    86  		var m4 block.Meta
    87  		m4.Version = 1
    88  		m4.ULID = ulid.MustNew(400, nil)
    89  		m4.Compaction.Level = 2
    90  		m4.Compaction.Sources = ids[9:] // covers the last block but is a different resolution. Must not trigger deletion.
    91  		m4.Resolution = 1000
    92  		m4.MinTime = 0
    93  		m4.MaxTime = model.Time(2 * time.Hour.Milliseconds())
    94  
    95  		var m5 block.Meta
    96  		m5.Version = 1
    97  		m5.ULID = ulid.MustNew(500, nil)
    98  		m5.Compaction.Level = 2
    99  		m5.Compaction.Sources = ids[8:9] // built from block 8, but different resolution. Block 8 is already included in m3, can be deleted.
   100  		m5.Resolution = 1000
   101  		m5.MinTime = 0
   102  		m5.MaxTime = model.Time(2 * time.Hour.Milliseconds())
   103  
   104  		// Create all blocks in the bucket.
   105  		for _, m := range append(metas, &m1, &m2, &m3, &m4, &m5) {
   106  			fmt.Println("create", m.ULID)
   107  			var buf bytes.Buffer
   108  			require.NoError(t, json.NewEncoder(&buf).Encode(&m))
   109  			require.NoError(t, bkt.Upload(ctx, path.Join(m.ULID.String(), block.MetaFilename), &buf))
   110  		}
   111  
   112  		duplicateBlocksFilter := NewShardAwareDeduplicateFilter()
   113  		metaFetcher, err := block.NewMetaFetcher(nil, 32, bkt, "", nil, []block.MetadataFilter{
   114  			duplicateBlocksFilter,
   115  		})
   116  		require.NoError(t, err)
   117  
   118  		blocksMarkedForDeletion := promauto.With(nil).NewCounter(prometheus.CounterOpts{})
   119  		sy, err := NewMetaSyncer(nil, nil, bkt, metaFetcher, duplicateBlocksFilter, blocksMarkedForDeletion)
   120  		require.NoError(t, err)
   121  
   122  		// Do one initial synchronization with the bucket.
   123  		require.NoError(t, sy.SyncMetas(ctx))
   124  		require.NoError(t, sy.GarbageCollect(ctx))
   125  
   126  		var rem []ulid.ULID
   127  		err = bkt.Iter(ctx, "", func(n string) error {
   128  			id, ok := block.IsBlockDir(n)
   129  			if !ok {
   130  				return nil
   131  			}
   132  			deletionMarkFile := path.Join(id.String(), block.DeletionMarkFilename)
   133  
   134  			exists, err := bkt.Exists(ctx, deletionMarkFile)
   135  			if err != nil {
   136  				return err
   137  			}
   138  			if !exists {
   139  				rem = append(rem, id)
   140  			}
   141  			return nil
   142  		})
   143  		require.NoError(t, err)
   144  
   145  		sort.Slice(rem, func(i, j int) bool {
   146  			return rem[i].Compare(rem[j]) < 0
   147  		})
   148  
   149  		// Only the level 3 block, the last source block in both resolutions should be left.
   150  		assert.Equal(t, []ulid.ULID{metas[9].ULID, m3.ULID, m4.ULID, m5.ULID}, rem)
   151  
   152  		// After another sync the changes should also be reflected in the local groups.
   153  		require.NoError(t, sy.SyncMetas(ctx))
   154  		require.NoError(t, sy.GarbageCollect(ctx))
   155  
   156  		// Only the level 3 block, the last source block in both resolutions should be left.
   157  		grouper := NewSplitAndMergeGrouper("user-1", []int64{2 * time.Hour.Milliseconds()}, 0, 0, 0, log.NewNopLogger())
   158  		groups, err := grouper.Groups(sy.Metas())
   159  		require.NoError(t, err)
   160  
   161  		assert.Equal(t, "0@17241709254077376921-merge--0-7200000", groups[0].Key())
   162  		assert.Equal(t, []ulid.ULID{metas[9].ULID, m3.ULID}, groups[0].IDs())
   163  
   164  		assert.Equal(t, "1000@17241709254077376921-merge--0-7200000", groups[1].Key())
   165  		assert.Equal(t, []ulid.ULID{m4.ULID, m5.ULID}, groups[1].IDs())
   166  	})
   167  }
   168  
   169  func TestGroupCompactE2E(t *testing.T) {
   170  	foreachStore(t, func(t *testing.T, bkt phlareobj.Bucket) {
   171  		userbkt := phlareobj.NewTenantBucketClient("user-1", bkt, nil).(phlareobj.Bucket)
   172  		// Use bucket with global markers to make sure that our custom filters work correctly.
   173  		userbkt = block.BucketWithGlobalMarkers(userbkt)
   174  
   175  		ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
   176  		defer cancel()
   177  
   178  		// Create fresh, empty directory for actual test.
   179  		dir := t.TempDir()
   180  
   181  		// Start dir checker... we make sure that "dir" only contains group subdirectories during compaction,
   182  		// and not any block directories. Dir checker stops when context is canceled, or on first error,
   183  		// in which case error is logger and test is failed. (We cannot use Fatal or FailNow from a goroutine).
   184  		go func() {
   185  			for ctx.Err() == nil {
   186  				fs, err := os.ReadDir(dir)
   187  				if err != nil && !os.IsNotExist(err) {
   188  					t.Log("error while listing directory", dir)
   189  					t.Fail()
   190  					return
   191  				}
   192  
   193  				for _, fi := range fs {
   194  					// Suffix used by Prometheus LeveledCompactor when doing compaction.
   195  					toCheck := strings.TrimSuffix(fi.Name(), ".tmp-for-creation")
   196  
   197  					_, err := ulid.Parse(toCheck)
   198  					if err == nil {
   199  						t.Log("found block directory in main compaction directory", fi.Name())
   200  						t.Fail()
   201  						return
   202  					}
   203  				}
   204  
   205  				select {
   206  				case <-time.After(100 * time.Millisecond):
   207  					continue
   208  				case <-ctx.Done():
   209  					return
   210  				}
   211  			}
   212  		}()
   213  
   214  		logger := log.NewLogfmtLogger(os.Stderr)
   215  
   216  		duplicateBlocksFilter := NewShardAwareDeduplicateFilter()
   217  		noCompactMarkerFilter := NewNoCompactionMarkFilter(userbkt, true)
   218  		metaFetcher, err := block.NewMetaFetcher(nil, 32, userbkt, "", nil, []block.MetadataFilter{
   219  			duplicateBlocksFilter,
   220  			noCompactMarkerFilter,
   221  		})
   222  		require.NoError(t, err)
   223  
   224  		blocksMarkedForDeletion := promauto.With(nil).NewCounter(prometheus.CounterOpts{})
   225  		sy, err := NewMetaSyncer(nil, nil, userbkt, metaFetcher, duplicateBlocksFilter, blocksMarkedForDeletion)
   226  		require.NoError(t, err)
   227  
   228  		planner := NewSplitAndMergePlanner([]int64{1000, 3000})
   229  		grouper := NewSplitAndMergeGrouper("user-1", []int64{1000, 3000}, 0, 0, 0, logger)
   230  		metrics := NewBucketCompactorMetrics(blocksMarkedForDeletion, prometheus.NewPedanticRegistry())
   231  		bComp, err := NewBucketCompactor(logger, sy, grouper, planner, &BlockCompactor{
   232  			blockOpenConcurrency: 100,
   233  			splitBy:              phlaredb.SplitByFingerprint,
   234  			logger:               logger,
   235  			metrics:              newCompactorMetrics(nil),
   236  		}, dir, userbkt, 2, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics)
   237  		require.NoError(t, err)
   238  
   239  		// Compaction on empty should not fail.
   240  		require.NoError(t, bComp.Compact(ctx, 0), 0)
   241  		assert.Equal(t, 0.0, promtest.ToFloat64(sy.metrics.blocksMarkedForDeletion))
   242  		assert.Equal(t, 0.0, promtest.ToFloat64(sy.metrics.garbageCollectionFailures))
   243  		assert.Equal(t, 0.0, promtest.ToFloat64(metrics.blocksMarkedForNoCompact))
   244  		assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactions))
   245  		assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsStarted))
   246  		assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsCompleted))
   247  		assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsFailed))
   248  
   249  		_, err = os.Stat(dir)
   250  		assert.True(t, os.IsNotExist(err), "dir %s should be remove after compaction.", dir)
   251  
   252  		m1 := createDBBlock(t, bkt, "user-1", 500, 1000, 10, nil)
   253  		m2 := createDBBlock(t, bkt, "user-1", 500, 1000, 10, nil)
   254  
   255  		m3 := createDBBlock(t, bkt, "user-1", 1001, 2000, 10, nil)
   256  		m4 := createDBBlock(t, bkt, "user-1", 1001, 3000, 10, nil)
   257  
   258  		require.NoError(t, bComp.Compact(ctx, 0), 0)
   259  		assert.Equal(t, 5.0, promtest.ToFloat64(sy.metrics.blocksMarkedForDeletion))
   260  		assert.Equal(t, 0.0, promtest.ToFloat64(metrics.blocksMarkedForNoCompact))
   261  		assert.Equal(t, 0.0, promtest.ToFloat64(sy.metrics.garbageCollectionFailures))
   262  		assert.Equal(t, 2.0, promtest.ToFloat64(metrics.groupCompactions))
   263  		assert.Equal(t, 2.0, promtest.ToFloat64(metrics.groupCompactionRunsStarted))
   264  		assert.Equal(t, 2.0, promtest.ToFloat64(metrics.groupCompactionRunsCompleted))
   265  		assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsFailed))
   266  
   267  		_, err = os.Stat(dir)
   268  		assert.True(t, os.IsNotExist(err), "dir %s should be remove after compaction.", dir)
   269  
   270  		metas, _, err := metaFetcher.FetchWithoutMarkedForDeletion(context.Background())
   271  		require.NoError(t, err)
   272  		require.Len(t, metas, 1)
   273  		var meta block.Meta
   274  		for _, m := range metas {
   275  			meta = *m
   276  		}
   277  		require.Equal(t, []ulid.ULID{m1, m2, m3, m4}, meta.Compaction.Sources)
   278  		require.Equal(t, 3, meta.Compaction.Level)
   279  		require.Equal(t, model.Time(500), meta.MinTime)
   280  		require.Equal(t, model.Time(3000), meta.MaxTime)
   281  	})
   282  }
   283  
   284  func foreachStore(t *testing.T, testFn func(t *testing.T, bkt phlareobj.Bucket)) {
   285  	t.Parallel()
   286  
   287  	// Mandatory Inmem. Not parallel, to detect problem early.
   288  	if ok := t.Run("inmem", func(t *testing.T) {
   289  		testFn(t, phlareobj.NewBucket(objstore.NewInMemBucket()))
   290  	}); !ok {
   291  		return
   292  	}
   293  
   294  	// Mandatory Filesystem.
   295  	t.Run("filesystem", func(t *testing.T) {
   296  		t.Parallel()
   297  
   298  		dir := t.TempDir()
   299  
   300  		b, err := filesystem.NewBucket(dir)
   301  		require.NoError(t, err)
   302  		testFn(t, b)
   303  	})
   304  }