github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/block/fetcher.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/storage/tsdb/block/fetcher.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Thanos Authors. 5 6 package block 7 8 import ( 9 "context" 10 "encoding/json" 11 "io" 12 "os" 13 "path" 14 "path/filepath" 15 "sync" 16 "time" 17 18 "github.com/go-kit/log" 19 "github.com/go-kit/log/level" 20 "github.com/golang/groupcache/singleflight" 21 "github.com/grafana/dskit/multierror" 22 "github.com/grafana/dskit/runutil" 23 "github.com/oklog/ulid/v2" 24 "github.com/pkg/errors" 25 "github.com/prometheus/client_golang/prometheus" 26 "github.com/prometheus/client_golang/prometheus/promauto" 27 "golang.org/x/sync/errgroup" 28 29 "github.com/grafana/pyroscope/pkg/objstore" 30 "github.com/grafana/pyroscope/pkg/util/extprom" 31 ) 32 33 // FetcherMetrics holds metrics tracked by the metadata fetcher. This struct and its fields are exported 34 // to allow depending projects (eg. Cortex) to implement their own custom metadata fetcher while tracking 35 // compatible metrics. 36 type FetcherMetrics struct { 37 Syncs prometheus.Counter 38 SyncFailures prometheus.Counter 39 SyncDuration prometheus.Histogram 40 41 Synced *extprom.TxGaugeVec 42 } 43 44 // Submit applies new values for metrics tracked by transaction GaugeVec. 45 func (s *FetcherMetrics) Submit() { 46 s.Synced.Submit() 47 } 48 49 // ResetTx starts new transaction for metrics tracked by transaction GaugeVec. 50 func (s *FetcherMetrics) ResetTx() { 51 s.Synced.ResetTx() 52 } 53 54 const ( 55 fetcherSubSys = "blocks_meta" 56 57 CorruptedMeta = "corrupted-meta-json" 58 NoMeta = "no-meta-json" 59 LoadedMeta = "loaded" 60 FailedMeta = "failed" 61 62 // Synced label values. 63 labelExcludedMeta = "label-excluded" 64 timeExcludedMeta = "time-excluded" 65 duplicateMeta = "duplicate" 66 // Blocks that are marked for deletion can be loaded as well. This is done to make sure that we load blocks that are meant to be deleted, 67 // but don't have a replacement block yet. 68 MarkedForDeletionMeta = "marked-for-deletion" 69 70 // MarkedForNoCompactionMeta is label for blocks which are loaded but also marked for no compaction. This label is also counted in `loaded` label metric. 71 MarkedForNoCompactionMeta = "marked-for-no-compact" 72 ) 73 74 func NewFetcherMetrics(reg prometheus.Registerer, syncedExtraLabels [][]string) *FetcherMetrics { 75 var m FetcherMetrics 76 77 m.Syncs = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 78 Subsystem: fetcherSubSys, 79 Name: "syncs_total", 80 Help: "Total blocks metadata synchronization attempts", 81 }) 82 m.SyncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 83 Subsystem: fetcherSubSys, 84 Name: "sync_failures_total", 85 Help: "Total blocks metadata synchronization failures", 86 }) 87 m.SyncDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ 88 Subsystem: fetcherSubSys, 89 Name: "sync_duration_seconds", 90 Help: "Duration of the blocks metadata synchronization in seconds", 91 Buckets: []float64{0.01, 1, 10, 100, 300, 600, 1000}, 92 }) 93 m.Synced = extprom.NewTxGaugeVec( 94 reg, 95 prometheus.GaugeOpts{ 96 Subsystem: fetcherSubSys, 97 Name: "synced", 98 Help: "Number of block metadata synced", 99 }, 100 []string{"state"}, 101 append([][]string{ 102 {CorruptedMeta}, 103 {NoMeta}, 104 {LoadedMeta}, 105 {FailedMeta}, 106 {labelExcludedMeta}, 107 {timeExcludedMeta}, 108 {duplicateMeta}, 109 {MarkedForDeletionMeta}, 110 {MarkedForNoCompactionMeta}, 111 }, syncedExtraLabels...)..., 112 ) 113 return &m 114 } 115 116 type MetadataFetcher interface { 117 Fetch(ctx context.Context) (metas map[ulid.ULID]*Meta, partial map[ulid.ULID]error, err error) 118 } 119 120 // GaugeVec hides something like a Prometheus GaugeVec or an extprom.TxGaugeVec. 121 type GaugeVec interface { 122 WithLabelValues(lvs ...string) prometheus.Gauge 123 } 124 125 // MetadataFilter allows filtering or modifying metas from the provided map or returns error. 126 type MetadataFilter interface { 127 Filter(ctx context.Context, metas map[ulid.ULID]*Meta, synced GaugeVec) error 128 } 129 130 // MetaFetcher is a struct that synchronizes filtered metadata of all block in the object storage with the local state. 131 // Go-routine safe. 132 type MetaFetcher struct { 133 logger log.Logger 134 concurrency int 135 bkt objstore.BucketReader 136 metrics *FetcherMetrics 137 filters []MetadataFilter 138 139 // Optional local directory to cache meta.json files. 140 cacheDir string 141 g singleflight.Group 142 143 mtx sync.Mutex 144 cached map[ulid.ULID]*Meta 145 } 146 147 // NewMetaFetcher returns a MetaFetcher. 148 func NewMetaFetcher(logger log.Logger, concurrency int, bkt objstore.BucketReader, dir string, reg prometheus.Registerer, filters []MetadataFilter) (*MetaFetcher, error) { 149 return NewMetaFetcherWithMetrics(logger, concurrency, bkt, dir, NewFetcherMetrics(reg, nil), filters) 150 } 151 152 func NewMetaFetcherWithMetrics(logger log.Logger, concurrency int, bkt objstore.BucketReader, dir string, metrics *FetcherMetrics, filters []MetadataFilter) (*MetaFetcher, error) { 153 if logger == nil { 154 logger = log.NewNopLogger() 155 } 156 157 cacheDir := "" 158 if dir != "" { 159 cacheDir = filepath.Join(dir, "meta-syncer") 160 if err := os.MkdirAll(cacheDir, os.ModePerm); err != nil { 161 return nil, err 162 } 163 } 164 165 return &MetaFetcher{ 166 logger: log.With(logger, "component", "block.MetaFetcher"), 167 concurrency: concurrency, 168 bkt: bkt, 169 cacheDir: cacheDir, 170 cached: map[ulid.ULID]*Meta{}, 171 metrics: metrics, 172 filters: filters, 173 }, nil 174 } 175 176 var ( 177 ErrorSyncMetaNotFound = errors.New("meta.json not found") 178 ErrorSyncMetaCorrupted = errors.New("meta.json corrupted") 179 ) 180 181 // LoadMeta returns metadata from object storage or error. 182 // It returns ErrorSyncMetaNotFound and ErrorSyncMetaCorrupted sentinel errors in those cases. 183 func (f *MetaFetcher) LoadMeta(ctx context.Context, id ulid.ULID) (*Meta, error) { 184 var ( 185 metaFile = path.Join(id.String(), MetaFilename) 186 cachedBlockDir = filepath.Join(f.cacheDir, id.String()) 187 ) 188 189 // Block meta.json file is immutable, so we lookup the cache as first thing without issuing 190 // any API call to the object storage. This significantly reduce the pressure on the object 191 // storage. 192 // 193 // Details of all possible cases: 194 // 195 // - The block upload is in progress: the meta.json file is guaranteed to be uploaded at last. 196 // When we'll try to read it from object storage (later on), it will fail with ErrorSyncMetaNotFound 197 // which is correctly handled by the caller (partial block). 198 // 199 // - The block upload is completed: this is the normal case. meta.json file still exists in the 200 // object storage and it's expected to match the locally cached one (because it's immutable by design). 201 // - The block has been marked for deletion: the deletion hasn't started yet, so the full block (including 202 // the meta.json file) is still in the object storage. This case is not different than the previous one. 203 // 204 // - The block deletion is in progress: loadMeta() function may return the cached meta.json while it should 205 // return ErrorSyncMetaNotFound. This is a race condition that could happen even if we check the meta.json 206 // file in the storage, because the deletion could start right after we check it but before the MetaFetcher 207 // completes its sync. 208 // 209 // - The block has been deleted: the loadMeta() function will not be called at all, because the block 210 // was not discovered while iterating the bucket since all its files were already deleted. 211 if m, seen := f.cached[id]; seen { 212 return m, nil 213 } 214 215 // Best effort load from local dir. 216 if f.cacheDir != "" { 217 m, err := ReadMetaFromDir(cachedBlockDir) 218 if err == nil { 219 return m, nil 220 } 221 222 if !errors.Is(err, os.ErrNotExist) { 223 level.Warn(f.logger).Log("msg", "best effort read of the local meta.json failed; removing cached block dir", "dir", cachedBlockDir, "err", err) 224 if err := os.RemoveAll(cachedBlockDir); err != nil { 225 level.Warn(f.logger).Log("msg", "best effort remove of cached dir failed; ignoring", "dir", cachedBlockDir, "err", err) 226 } 227 } 228 } 229 230 // todo(cyriltovena): we should use ReaderWithExpectedErrs(f.bkt.IsObjNotFoundErr) here, to avoid counting IsObjNotFoundErr as an error 231 // since this is expected 232 r, err := f.bkt.Get(ctx, metaFile) 233 if f.bkt.IsObjNotFoundErr(err) { 234 // Meta.json was deleted between bkt.Exists and here. 235 return nil, errors.Wrapf(ErrorSyncMetaNotFound, "%v", err) 236 } 237 if err != nil { 238 return nil, errors.Wrapf(err, "get meta file: %v", metaFile) 239 } 240 241 defer runutil.CloseWithLogOnErr(f.logger, r, "close bkt meta get") 242 243 metaContent, err := io.ReadAll(r) 244 if err != nil { 245 return nil, errors.Wrapf(err, "read meta file: %v", metaFile) 246 } 247 248 m := &Meta{} 249 if err := json.Unmarshal(metaContent, m); err != nil { 250 return nil, errors.Wrapf(ErrorSyncMetaCorrupted, "meta.json %v unmarshal: %v", metaFile, err) 251 } 252 253 if !m.Version.IsValid() { 254 return nil, errors.Errorf("unexpected meta file: %s version: %d", metaFile, m.Version) 255 } 256 257 // Best effort cache in local dir. 258 if f.cacheDir != "" { 259 if err := os.MkdirAll(cachedBlockDir, os.ModePerm); err != nil { 260 level.Warn(f.logger).Log("msg", "best effort mkdir of the meta.json block dir failed; ignoring", "dir", cachedBlockDir, "err", err) 261 } 262 263 if _, err := m.WriteToFile(f.logger, cachedBlockDir); err != nil { 264 level.Warn(f.logger).Log("msg", "best effort save of the meta.json to local dir failed; ignoring", "dir", cachedBlockDir, "err", err) 265 } 266 } 267 return m, nil 268 } 269 270 type response struct { 271 metas map[ulid.ULID]*Meta 272 partial map[ulid.ULID]error 273 274 // If metaErr > 0 it means incomplete view, so some metas, failed to be loaded. 275 metaErrs multierror.MultiError 276 277 // Track the number of blocks not returned because of various reasons. 278 noMetasCount float64 279 corruptedMetasCount float64 280 markedForDeletionCount float64 281 } 282 283 func (f *MetaFetcher) fetchMetadata(ctx context.Context, excludeMarkedForDeletion bool) (interface{}, error) { 284 var ( 285 resp = response{ 286 metas: make(map[ulid.ULID]*Meta), 287 partial: make(map[ulid.ULID]error), 288 } 289 eg errgroup.Group 290 ch = make(chan ulid.ULID, f.concurrency) 291 mtx sync.Mutex 292 ) 293 level.Debug(f.logger).Log("msg", "fetching meta data", "concurrency", f.concurrency) 294 295 // Get the list of blocks marked for deletion so that we'll exclude them (if required). 296 var markedForDeletion map[ulid.ULID]struct{} 297 if excludeMarkedForDeletion { 298 var err error 299 300 markedForDeletion, err = ListBlockDeletionMarks(ctx, f.bkt) 301 if err != nil { 302 return nil, err 303 } 304 } 305 306 // Run workers. 307 for i := 0; i < f.concurrency; i++ { 308 eg.Go(func() error { 309 for id := range ch { 310 meta, err := f.LoadMeta(ctx, id) 311 if err == nil { 312 mtx.Lock() 313 resp.metas[id] = meta 314 mtx.Unlock() 315 continue 316 } 317 318 if errors.Is(errors.Cause(err), ErrorSyncMetaNotFound) { 319 mtx.Lock() 320 resp.noMetasCount++ 321 mtx.Unlock() 322 } else if errors.Is(errors.Cause(err), ErrorSyncMetaCorrupted) { 323 mtx.Lock() 324 resp.corruptedMetasCount++ 325 mtx.Unlock() 326 } else { 327 mtx.Lock() 328 resp.metaErrs.Add(err) 329 mtx.Unlock() 330 continue 331 } 332 333 mtx.Lock() 334 resp.partial[id] = err 335 mtx.Unlock() 336 } 337 return nil 338 }) 339 } 340 341 // Workers scheduled, distribute blocks. 342 eg.Go(func() error { 343 defer close(ch) 344 return f.bkt.Iter(ctx, "", func(name string) error { 345 id, ok := IsBlockDir(name) 346 if !ok { 347 return nil 348 } 349 350 // If requested, skip any block marked for deletion. 351 if _, marked := markedForDeletion[id]; excludeMarkedForDeletion && marked { 352 resp.markedForDeletionCount++ 353 return nil 354 } 355 356 select { 357 case <-ctx.Done(): 358 return ctx.Err() 359 case ch <- id: 360 } 361 362 return nil 363 }) 364 }) 365 366 if err := eg.Wait(); err != nil { 367 return nil, errors.Wrap(err, "MetaFetcher: iter bucket") 368 } 369 370 if len(resp.metaErrs) > 0 { 371 return resp, nil 372 } 373 374 // Only for complete view of blocks update the cache. 375 cached := make(map[ulid.ULID]*Meta, len(resp.metas)) 376 for id, m := range resp.metas { 377 cached[id] = m 378 } 379 380 f.mtx.Lock() 381 f.cached = cached 382 f.mtx.Unlock() 383 384 // Best effort cleanup of disk-cached metas. 385 if f.cacheDir != "" { 386 fis, err := os.ReadDir(f.cacheDir) 387 names := make([]string, 0, len(fis)) 388 for _, fi := range fis { 389 names = append(names, fi.Name()) 390 } 391 if err != nil { 392 level.Warn(f.logger).Log("msg", "best effort remove of not needed cached dirs failed; ignoring", "err", err) 393 } else { 394 for _, n := range names { 395 id, ok := IsBlockDir(n) 396 if !ok { 397 continue 398 } 399 400 if _, ok := resp.metas[id]; ok { 401 continue 402 } 403 404 cachedBlockDir := filepath.Join(f.cacheDir, id.String()) 405 406 // No such block loaded, remove the local dir. 407 if err := os.RemoveAll(cachedBlockDir); err != nil { 408 level.Warn(f.logger).Log("msg", "best effort remove of not needed cached dir failed; ignoring", "dir", cachedBlockDir, "err", err) 409 } 410 } 411 } 412 } 413 return resp, nil 414 } 415 416 // Fetch returns all block metas as well as partial blocks (blocks without or with corrupted meta file) from the bucket. 417 // It's caller responsibility to not change the returned metadata files. Maps can be modified. 418 // 419 // Returned error indicates a failure in fetching metadata. Returned meta can be assumed as correct, with some blocks missing. 420 func (f *MetaFetcher) Fetch(ctx context.Context) (metas map[ulid.ULID]*Meta, partials map[ulid.ULID]error, err error) { 421 metas, partials, err = f.fetch(ctx, false) 422 return 423 } 424 425 // FetchWithoutMarkedForDeletion returns all block metas as well as partial blocks (blocks without or with corrupted meta file) from the bucket. 426 // This function excludes all blocks for deletion (no deletion delay applied). 427 // It's caller responsibility to not change the returned metadata files. Maps can be modified. 428 // 429 // Returned error indicates a failure in fetching metadata. Returned meta can be assumed as correct, with some blocks missing. 430 func (f *MetaFetcher) FetchWithoutMarkedForDeletion(ctx context.Context) (metas map[ulid.ULID]*Meta, partials map[ulid.ULID]error, err error) { 431 metas, partials, err = f.fetch(ctx, true) 432 return 433 } 434 435 func (f *MetaFetcher) fetch(ctx context.Context, excludeMarkedForDeletion bool) (_ map[ulid.ULID]*Meta, _ map[ulid.ULID]error, err error) { 436 start := time.Now() 437 defer func() { 438 f.metrics.SyncDuration.Observe(time.Since(start).Seconds()) 439 if err != nil { 440 f.metrics.SyncFailures.Inc() 441 } 442 }() 443 f.metrics.Syncs.Inc() 444 f.metrics.ResetTx() 445 446 // Run this in thread safe run group. 447 v, err := f.g.Do("", func() (i interface{}, err error) { 448 // NOTE: First go routine context will go through. 449 return f.fetchMetadata(ctx, excludeMarkedForDeletion) 450 }) 451 if err != nil { 452 return nil, nil, err 453 } 454 resp := v.(response) 455 456 // Copy as same response might be reused by different goroutines. 457 metas := make(map[ulid.ULID]*Meta, len(resp.metas)) 458 for id, m := range resp.metas { 459 metas[id] = m 460 } 461 462 f.metrics.Synced.WithLabelValues(FailedMeta).Set(float64(len(resp.metaErrs))) 463 f.metrics.Synced.WithLabelValues(NoMeta).Set(resp.noMetasCount) 464 f.metrics.Synced.WithLabelValues(CorruptedMeta).Set(resp.corruptedMetasCount) 465 if excludeMarkedForDeletion { 466 f.metrics.Synced.WithLabelValues(MarkedForDeletionMeta).Set(resp.markedForDeletionCount) 467 } 468 469 for _, filter := range f.filters { 470 // NOTE: filter can update synced metric accordingly to the reason of the exclude. 471 if err := filter.Filter(ctx, metas, f.metrics.Synced); err != nil { 472 return nil, nil, errors.Wrap(err, "filter metas") 473 } 474 } 475 476 f.metrics.Synced.WithLabelValues(LoadedMeta).Set(float64(len(metas))) 477 f.metrics.Submit() 478 479 if len(resp.metaErrs) > 0 { 480 return metas, resp.partial, errors.Wrap(resp.metaErrs.Err(), "incomplete view") 481 } 482 483 level.Info(f.logger).Log("msg", "successfully synchronized block metadata", "duration", time.Since(start).String(), "duration_ms", time.Since(start).Milliseconds(), "cached", f.countCached(), "returned", len(metas), "partial", len(resp.partial)) 484 return metas, resp.partial, nil 485 } 486 487 func (f *MetaFetcher) countCached() int { 488 f.mtx.Lock() 489 defer f.mtx.Unlock() 490 491 return len(f.cached) 492 } 493 494 // BlockIDLabel is a special label that will have an ULID of the meta.json being referenced to. 495 const BlockIDLabel = "__block_id" 496 497 // IgnoreDeletionMarkFilter is a filter that filters out the blocks that are marked for deletion after a given delay. 498 // The delay duration is to make sure that the replacement block can be fetched before we filter out the old block. 499 // Delay is not considered when computing DeletionMarkBlocks map. 500 // Not go-routine safe. 501 type IgnoreDeletionMarkFilter struct { 502 logger log.Logger 503 delay time.Duration 504 concurrency int 505 bkt objstore.BucketReader 506 507 mtx sync.Mutex 508 deletionMarkMap map[ulid.ULID]*DeletionMark 509 } 510 511 // NewIgnoreDeletionMarkFilter creates IgnoreDeletionMarkFilter. 512 func NewIgnoreDeletionMarkFilter(logger log.Logger, bkt objstore.BucketReader, delay time.Duration, concurrency int) *IgnoreDeletionMarkFilter { 513 return &IgnoreDeletionMarkFilter{ 514 logger: logger, 515 bkt: bkt, 516 delay: delay, 517 concurrency: concurrency, 518 } 519 } 520 521 // DeletionMarkBlocks returns block ids that were marked for deletion. 522 func (f *IgnoreDeletionMarkFilter) DeletionMarkBlocks() map[ulid.ULID]*DeletionMark { 523 f.mtx.Lock() 524 defer f.mtx.Unlock() 525 526 deletionMarkMap := make(map[ulid.ULID]*DeletionMark, len(f.deletionMarkMap)) 527 for id, meta := range f.deletionMarkMap { 528 deletionMarkMap[id] = meta 529 } 530 531 return deletionMarkMap 532 } 533 534 // Filter filters out blocks that are marked for deletion after a given delay. 535 // It also returns the blocks that can be deleted since they were uploaded delay duration before current time. 536 func (f *IgnoreDeletionMarkFilter) Filter(ctx context.Context, metas map[ulid.ULID]*Meta, synced GaugeVec) error { 537 deletionMarkMap := make(map[ulid.ULID]*DeletionMark) 538 539 // Make a copy of block IDs to check, in order to avoid concurrency issues 540 // between the scheduler and workers. 541 blockIDs := make([]ulid.ULID, 0, len(metas)) 542 for id := range metas { 543 blockIDs = append(blockIDs, id) 544 } 545 546 var ( 547 eg errgroup.Group 548 ch = make(chan ulid.ULID, f.concurrency) 549 mtx sync.Mutex 550 ) 551 552 for i := 0; i < f.concurrency; i++ { 553 eg.Go(func() error { 554 var lastErr error 555 for id := range ch { 556 m := &DeletionMark{} 557 if err := ReadMarker(ctx, f.logger, f.bkt, id.String(), m); err != nil { 558 if errors.Is(errors.Cause(err), ErrorMarkerNotFound) { 559 continue 560 } 561 if errors.Is(errors.Cause(err), ErrorUnmarshalMarker) { 562 level.Warn(f.logger).Log("msg", "found partial deletion-mark.json; if we will see it happening often for the same block, consider manually deleting deletion-mark.json from the object storage", "block", id, "err", err) 563 continue 564 } 565 // Remember the last error and continue to drain the channel. 566 lastErr = err 567 continue 568 } 569 570 // Keep track of the blocks marked for deletion and filter them out if their 571 // deletion time is greater than the configured delay. 572 mtx.Lock() 573 deletionMarkMap[id] = m 574 if time.Since(time.Unix(m.DeletionTime, 0)).Seconds() > f.delay.Seconds() { 575 synced.WithLabelValues(MarkedForDeletionMeta).Inc() 576 delete(metas, id) 577 } 578 mtx.Unlock() 579 } 580 581 return lastErr 582 }) 583 } 584 585 // Workers scheduled, distribute blocks. 586 eg.Go(func() error { 587 defer close(ch) 588 589 for _, id := range blockIDs { 590 select { 591 case ch <- id: 592 // Nothing to do. 593 case <-ctx.Done(): 594 return ctx.Err() 595 } 596 } 597 598 return nil 599 }) 600 601 if err := eg.Wait(); err != nil { 602 return errors.Wrap(err, "filter blocks marked for deletion") 603 } 604 605 f.mtx.Lock() 606 f.deletionMarkMap = deletionMarkMap 607 f.mtx.Unlock() 608 609 return nil 610 }