github.com/grafana/pyroscope@v1.18.0/pkg/compactor/bucket_compactor_e2e_test.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/bucket_compactor_e2e_test.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package compactor 7 8 import ( 9 "bytes" 10 "context" 11 "encoding/json" 12 "fmt" 13 "os" 14 "path" 15 "sort" 16 "strings" 17 "testing" 18 "time" 19 20 "github.com/go-kit/log" 21 "github.com/oklog/ulid/v2" 22 "github.com/prometheus/client_golang/prometheus" 23 "github.com/prometheus/client_golang/prometheus/promauto" 24 promtest "github.com/prometheus/client_golang/prometheus/testutil" 25 "github.com/prometheus/common/model" 26 "github.com/stretchr/testify/assert" 27 "github.com/stretchr/testify/require" 28 "github.com/thanos-io/objstore" 29 30 phlareobj "github.com/grafana/pyroscope/pkg/objstore" 31 "github.com/grafana/pyroscope/pkg/objstore/providers/filesystem" 32 "github.com/grafana/pyroscope/pkg/phlaredb" 33 "github.com/grafana/pyroscope/pkg/phlaredb/block" 34 ) 35 36 func TestSyncer_GarbageCollect_e2e(t *testing.T) { 37 foreachStore(t, func(t *testing.T, bkt phlareobj.Bucket) { 38 // Use bucket with global markers to make sure that our custom filters work correctly. 39 bkt = block.BucketWithGlobalMarkers(bkt) 40 41 ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) 42 defer cancel() 43 44 // Generate 10 source block metas and construct higher level blocks 45 // that are higher compactions of them. 46 var metas []*block.Meta 47 var ids []ulid.ULID 48 49 for i := 0; i < 10; i++ { 50 var m block.Meta 51 52 m.Version = 1 53 m.ULID = ulid.MustNew(uint64(i), nil) 54 m.Compaction.Sources = []ulid.ULID{m.ULID} 55 m.Compaction.Level = 1 56 m.MinTime = 0 57 m.MaxTime = model.Time(2 * time.Hour.Milliseconds()) 58 59 ids = append(ids, m.ULID) 60 metas = append(metas, &m) 61 } 62 63 var m1 block.Meta 64 m1.Version = 1 65 m1.ULID = ulid.MustNew(100, nil) 66 m1.Compaction.Level = 2 67 m1.Compaction.Sources = ids[:4] 68 m1.Resolution = 0 69 70 var m2 block.Meta 71 m2.Version = 1 72 m2.ULID = ulid.MustNew(200, nil) 73 m2.Compaction.Level = 2 74 m2.Compaction.Sources = ids[4:8] // last two source IDs is not part of a level 2 block. 75 m2.Resolution = 0 76 77 var m3 block.Meta 78 m3.Version = 1 79 m3.ULID = ulid.MustNew(300, nil) 80 m3.Compaction.Level = 3 81 m3.Compaction.Sources = ids[:9] // last source ID is not part of level 3 block. 82 m3.Resolution = 0 83 m3.MinTime = 0 84 m3.MaxTime = model.Time(2 * time.Hour.Milliseconds()) 85 86 var m4 block.Meta 87 m4.Version = 1 88 m4.ULID = ulid.MustNew(400, nil) 89 m4.Compaction.Level = 2 90 m4.Compaction.Sources = ids[9:] // covers the last block but is a different resolution. Must not trigger deletion. 91 m4.Resolution = 1000 92 m4.MinTime = 0 93 m4.MaxTime = model.Time(2 * time.Hour.Milliseconds()) 94 95 var m5 block.Meta 96 m5.Version = 1 97 m5.ULID = ulid.MustNew(500, nil) 98 m5.Compaction.Level = 2 99 m5.Compaction.Sources = ids[8:9] // built from block 8, but different resolution. Block 8 is already included in m3, can be deleted. 100 m5.Resolution = 1000 101 m5.MinTime = 0 102 m5.MaxTime = model.Time(2 * time.Hour.Milliseconds()) 103 104 // Create all blocks in the bucket. 105 for _, m := range append(metas, &m1, &m2, &m3, &m4, &m5) { 106 fmt.Println("create", m.ULID) 107 var buf bytes.Buffer 108 require.NoError(t, json.NewEncoder(&buf).Encode(&m)) 109 require.NoError(t, bkt.Upload(ctx, path.Join(m.ULID.String(), block.MetaFilename), &buf)) 110 } 111 112 duplicateBlocksFilter := NewShardAwareDeduplicateFilter() 113 metaFetcher, err := block.NewMetaFetcher(nil, 32, bkt, "", nil, []block.MetadataFilter{ 114 duplicateBlocksFilter, 115 }) 116 require.NoError(t, err) 117 118 blocksMarkedForDeletion := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) 119 sy, err := NewMetaSyncer(nil, nil, bkt, metaFetcher, duplicateBlocksFilter, blocksMarkedForDeletion) 120 require.NoError(t, err) 121 122 // Do one initial synchronization with the bucket. 123 require.NoError(t, sy.SyncMetas(ctx)) 124 require.NoError(t, sy.GarbageCollect(ctx)) 125 126 var rem []ulid.ULID 127 err = bkt.Iter(ctx, "", func(n string) error { 128 id, ok := block.IsBlockDir(n) 129 if !ok { 130 return nil 131 } 132 deletionMarkFile := path.Join(id.String(), block.DeletionMarkFilename) 133 134 exists, err := bkt.Exists(ctx, deletionMarkFile) 135 if err != nil { 136 return err 137 } 138 if !exists { 139 rem = append(rem, id) 140 } 141 return nil 142 }) 143 require.NoError(t, err) 144 145 sort.Slice(rem, func(i, j int) bool { 146 return rem[i].Compare(rem[j]) < 0 147 }) 148 149 // Only the level 3 block, the last source block in both resolutions should be left. 150 assert.Equal(t, []ulid.ULID{metas[9].ULID, m3.ULID, m4.ULID, m5.ULID}, rem) 151 152 // After another sync the changes should also be reflected in the local groups. 153 require.NoError(t, sy.SyncMetas(ctx)) 154 require.NoError(t, sy.GarbageCollect(ctx)) 155 156 // Only the level 3 block, the last source block in both resolutions should be left. 157 grouper := NewSplitAndMergeGrouper("user-1", []int64{2 * time.Hour.Milliseconds()}, 0, 0, 0, log.NewNopLogger()) 158 groups, err := grouper.Groups(sy.Metas()) 159 require.NoError(t, err) 160 161 assert.Equal(t, "0@17241709254077376921-merge--0-7200000", groups[0].Key()) 162 assert.Equal(t, []ulid.ULID{metas[9].ULID, m3.ULID}, groups[0].IDs()) 163 164 assert.Equal(t, "1000@17241709254077376921-merge--0-7200000", groups[1].Key()) 165 assert.Equal(t, []ulid.ULID{m4.ULID, m5.ULID}, groups[1].IDs()) 166 }) 167 } 168 169 func TestGroupCompactE2E(t *testing.T) { 170 foreachStore(t, func(t *testing.T, bkt phlareobj.Bucket) { 171 userbkt := phlareobj.NewTenantBucketClient("user-1", bkt, nil).(phlareobj.Bucket) 172 // Use bucket with global markers to make sure that our custom filters work correctly. 173 userbkt = block.BucketWithGlobalMarkers(userbkt) 174 175 ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) 176 defer cancel() 177 178 // Create fresh, empty directory for actual test. 179 dir := t.TempDir() 180 181 // Start dir checker... we make sure that "dir" only contains group subdirectories during compaction, 182 // and not any block directories. Dir checker stops when context is canceled, or on first error, 183 // in which case error is logger and test is failed. (We cannot use Fatal or FailNow from a goroutine). 184 go func() { 185 for ctx.Err() == nil { 186 fs, err := os.ReadDir(dir) 187 if err != nil && !os.IsNotExist(err) { 188 t.Log("error while listing directory", dir) 189 t.Fail() 190 return 191 } 192 193 for _, fi := range fs { 194 // Suffix used by Prometheus LeveledCompactor when doing compaction. 195 toCheck := strings.TrimSuffix(fi.Name(), ".tmp-for-creation") 196 197 _, err := ulid.Parse(toCheck) 198 if err == nil { 199 t.Log("found block directory in main compaction directory", fi.Name()) 200 t.Fail() 201 return 202 } 203 } 204 205 select { 206 case <-time.After(100 * time.Millisecond): 207 continue 208 case <-ctx.Done(): 209 return 210 } 211 } 212 }() 213 214 logger := log.NewLogfmtLogger(os.Stderr) 215 216 duplicateBlocksFilter := NewShardAwareDeduplicateFilter() 217 noCompactMarkerFilter := NewNoCompactionMarkFilter(userbkt, true) 218 metaFetcher, err := block.NewMetaFetcher(nil, 32, userbkt, "", nil, []block.MetadataFilter{ 219 duplicateBlocksFilter, 220 noCompactMarkerFilter, 221 }) 222 require.NoError(t, err) 223 224 blocksMarkedForDeletion := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) 225 sy, err := NewMetaSyncer(nil, nil, userbkt, metaFetcher, duplicateBlocksFilter, blocksMarkedForDeletion) 226 require.NoError(t, err) 227 228 planner := NewSplitAndMergePlanner([]int64{1000, 3000}) 229 grouper := NewSplitAndMergeGrouper("user-1", []int64{1000, 3000}, 0, 0, 0, logger) 230 metrics := NewBucketCompactorMetrics(blocksMarkedForDeletion, prometheus.NewPedanticRegistry()) 231 bComp, err := NewBucketCompactor(logger, sy, grouper, planner, &BlockCompactor{ 232 blockOpenConcurrency: 100, 233 splitBy: phlaredb.SplitByFingerprint, 234 logger: logger, 235 metrics: newCompactorMetrics(nil), 236 }, dir, userbkt, 2, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics) 237 require.NoError(t, err) 238 239 // Compaction on empty should not fail. 240 require.NoError(t, bComp.Compact(ctx, 0), 0) 241 assert.Equal(t, 0.0, promtest.ToFloat64(sy.metrics.blocksMarkedForDeletion)) 242 assert.Equal(t, 0.0, promtest.ToFloat64(sy.metrics.garbageCollectionFailures)) 243 assert.Equal(t, 0.0, promtest.ToFloat64(metrics.blocksMarkedForNoCompact)) 244 assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactions)) 245 assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsStarted)) 246 assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsCompleted)) 247 assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsFailed)) 248 249 _, err = os.Stat(dir) 250 assert.True(t, os.IsNotExist(err), "dir %s should be remove after compaction.", dir) 251 252 m1 := createDBBlock(t, bkt, "user-1", 500, 1000, 10, nil) 253 m2 := createDBBlock(t, bkt, "user-1", 500, 1000, 10, nil) 254 255 m3 := createDBBlock(t, bkt, "user-1", 1001, 2000, 10, nil) 256 m4 := createDBBlock(t, bkt, "user-1", 1001, 3000, 10, nil) 257 258 require.NoError(t, bComp.Compact(ctx, 0), 0) 259 assert.Equal(t, 5.0, promtest.ToFloat64(sy.metrics.blocksMarkedForDeletion)) 260 assert.Equal(t, 0.0, promtest.ToFloat64(metrics.blocksMarkedForNoCompact)) 261 assert.Equal(t, 0.0, promtest.ToFloat64(sy.metrics.garbageCollectionFailures)) 262 assert.Equal(t, 2.0, promtest.ToFloat64(metrics.groupCompactions)) 263 assert.Equal(t, 2.0, promtest.ToFloat64(metrics.groupCompactionRunsStarted)) 264 assert.Equal(t, 2.0, promtest.ToFloat64(metrics.groupCompactionRunsCompleted)) 265 assert.Equal(t, 0.0, promtest.ToFloat64(metrics.groupCompactionRunsFailed)) 266 267 _, err = os.Stat(dir) 268 assert.True(t, os.IsNotExist(err), "dir %s should be remove after compaction.", dir) 269 270 metas, _, err := metaFetcher.FetchWithoutMarkedForDeletion(context.Background()) 271 require.NoError(t, err) 272 require.Len(t, metas, 1) 273 var meta block.Meta 274 for _, m := range metas { 275 meta = *m 276 } 277 require.Equal(t, []ulid.ULID{m1, m2, m3, m4}, meta.Compaction.Sources) 278 require.Equal(t, 3, meta.Compaction.Level) 279 require.Equal(t, model.Time(500), meta.MinTime) 280 require.Equal(t, model.Time(3000), meta.MaxTime) 281 }) 282 } 283 284 func foreachStore(t *testing.T, testFn func(t *testing.T, bkt phlareobj.Bucket)) { 285 t.Parallel() 286 287 // Mandatory Inmem. Not parallel, to detect problem early. 288 if ok := t.Run("inmem", func(t *testing.T) { 289 testFn(t, phlareobj.NewBucket(objstore.NewInMemBucket())) 290 }); !ok { 291 return 292 } 293 294 // Mandatory Filesystem. 295 t.Run("filesystem", func(t *testing.T) { 296 t.Parallel() 297 298 dir := t.TempDir() 299 300 b, err := filesystem.NewBucket(dir) 301 require.NoError(t, err) 302 testFn(t, b) 303 }) 304 }