github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/bucketindex/updater.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/storage/tsdb/bucketindex/updater.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package bucketindex
     7  
     8  import (
     9  	"context"
    10  	"encoding/json"
    11  	"io"
    12  	"path"
    13  	"time"
    14  
    15  	"github.com/go-kit/log"
    16  	"github.com/go-kit/log/level"
    17  	"github.com/grafana/dskit/runutil"
    18  	"github.com/oklog/ulid/v2"
    19  	"github.com/pkg/errors"
    20  
    21  	"github.com/grafana/pyroscope/pkg/objstore"
    22  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    23  )
    24  
    25  var (
    26  	ErrBlockMetaNotFound          = block.ErrorSyncMetaNotFound
    27  	ErrBlockMetaCorrupted         = block.ErrorSyncMetaCorrupted
    28  	ErrBlockDeletionMarkNotFound  = errors.New("block deletion mark not found")
    29  	ErrBlockDeletionMarkCorrupted = errors.New("block deletion mark corrupted")
    30  )
    31  
    32  // Updater is responsible to generate an update in-memory bucket index.
    33  type Updater struct {
    34  	bkt    objstore.InstrumentedBucket
    35  	logger log.Logger
    36  }
    37  
    38  func NewUpdater(bkt objstore.Bucket, userID string, cfgProvider objstore.TenantConfigProvider, logger log.Logger) *Updater {
    39  	return &Updater{
    40  		bkt:    objstore.NewTenantBucketClient(userID, bkt, cfgProvider),
    41  		logger: logger,
    42  	}
    43  }
    44  
    45  // UpdateIndex generates the bucket index and returns it, without storing it to the storage.
    46  // If the old index is not passed in input, then the bucket index will be generated from scratch.
    47  func (w *Updater) UpdateIndex(ctx context.Context, old *Index) (*Index, map[ulid.ULID]error, error) {
    48  	var oldBlocks []*Block
    49  	var oldBlockDeletionMarks []*BlockDeletionMark
    50  
    51  	// Use the old index if provided, and it is using the latest version format.
    52  	if old != nil && old.Version == IndexVersion3 {
    53  		oldBlocks = old.Blocks
    54  		oldBlockDeletionMarks = old.BlockDeletionMarks
    55  	}
    56  
    57  	blocks, partials, err := w.updateBlocks(ctx, oldBlocks)
    58  	if err != nil {
    59  		return nil, nil, err
    60  	}
    61  
    62  	blockDeletionMarks, err := w.updateBlockDeletionMarks(ctx, oldBlockDeletionMarks)
    63  	if err != nil {
    64  		return nil, nil, err
    65  	}
    66  
    67  	return &Index{
    68  		Version:            IndexVersion3,
    69  		Blocks:             blocks,
    70  		BlockDeletionMarks: blockDeletionMarks,
    71  		UpdatedAt:          time.Now().Unix(),
    72  	}, partials, nil
    73  }
    74  
    75  func (w *Updater) updateBlocks(ctx context.Context, old []*Block) (blocks []*Block, partials map[ulid.ULID]error, _ error) {
    76  	discovered := map[ulid.ULID]struct{}{}
    77  	partials = map[ulid.ULID]error{}
    78  
    79  	// Find all blocks in the storage.
    80  	err := w.bkt.Iter(ctx, "", func(name string) error {
    81  		if id, ok := block.IsBlockDir(name); ok {
    82  			discovered[id] = struct{}{}
    83  		}
    84  		return nil
    85  	})
    86  	if err != nil {
    87  		return nil, nil, errors.Wrap(err, "list blocks")
    88  	}
    89  
    90  	// Since blocks are immutable, all blocks already existing in the index can just be copied.
    91  	for _, b := range old {
    92  		if _, ok := discovered[b.ID]; ok {
    93  			blocks = append(blocks, b)
    94  			delete(discovered, b.ID)
    95  		}
    96  	}
    97  
    98  	level.Info(w.logger).Log("msg", "listed all blocks in storage", "newly_discovered", len(discovered), "existing", len(old))
    99  
   100  	// Remaining blocks are new ones and we have to fetch the meta.json for each of them, in order
   101  	// to find out if their upload has been completed (meta.json is uploaded last) and get the block
   102  	// information to store in the bucket index.
   103  	for id := range discovered {
   104  		b, err := w.updateBlockIndexEntry(ctx, id)
   105  		if err == nil {
   106  			blocks = append(blocks, b)
   107  			continue
   108  		}
   109  
   110  		if errors.Is(err, ErrBlockMetaNotFound) {
   111  			partials[id] = err
   112  			level.Warn(w.logger).Log("msg", "skipped partial block when updating bucket index", "block", id.String())
   113  			continue
   114  		}
   115  		if errors.Is(err, ErrBlockMetaCorrupted) {
   116  			partials[id] = err
   117  			level.Error(w.logger).Log("msg", "skipped block with corrupted meta.json when updating bucket index", "block", id.String(), "err", err)
   118  			continue
   119  		}
   120  		return nil, nil, err
   121  	}
   122  	level.Info(w.logger).Log("msg", "fetched blocks metas for newly discovered blocks", "total_blocks", len(blocks), "partial_errors", len(partials))
   123  
   124  	return blocks, partials, nil
   125  }
   126  
   127  func (w *Updater) updateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Block, error) {
   128  	// Set a generous timeout for fetching the meta.json and getting the attributes of the same file.
   129  	// This protects against operations that can take unbounded time.
   130  	ctx, cancel := context.WithTimeout(ctx, time.Minute)
   131  	defer cancel()
   132  
   133  	metaFile := path.Join(id.String(), block.MetaFilename)
   134  
   135  	// Get the block's meta.json file.
   136  	r, err := w.bkt.Get(ctx, metaFile)
   137  	if w.bkt.IsObjNotFoundErr(err) {
   138  		return nil, ErrBlockMetaNotFound
   139  	}
   140  	if err != nil {
   141  		return nil, errors.Wrapf(err, "get block meta file: %v", metaFile)
   142  	}
   143  	defer runutil.CloseWithLogOnErr(w.logger, r, "close get block meta file")
   144  
   145  	metaContent, err := io.ReadAll(r)
   146  	if err != nil {
   147  		return nil, errors.Wrapf(err, "read block meta file: %v", metaFile)
   148  	}
   149  
   150  	// Unmarshal it.
   151  	m := block.Meta{}
   152  	if err := json.Unmarshal(metaContent, &m); err != nil {
   153  		return nil, errors.Wrapf(ErrBlockMetaCorrupted, "unmarshal block meta file %s: %v", metaFile, err)
   154  	}
   155  
   156  	if !m.Version.IsValid() {
   157  		return nil, errors.Errorf("unexpected block meta version: %s version: %d", metaFile, m.Version)
   158  	}
   159  
   160  	block := BlockFromMeta(m)
   161  
   162  	// Get the meta.json attributes.
   163  	attrs, err := w.bkt.Attributes(ctx, metaFile)
   164  	if err != nil {
   165  		return nil, errors.Wrapf(err, "read meta file attributes: %v", metaFile)
   166  	}
   167  
   168  	// Since the meta.json file is the last file of a block being uploaded and it's immutable
   169  	// we can safely assume that the last modified timestamp of the meta.json is the time when
   170  	// the block has completed to be uploaded.
   171  	block.UploadedAt = attrs.LastModified.Unix()
   172  
   173  	return block, nil
   174  }
   175  
   176  func (w *Updater) updateBlockDeletionMarks(ctx context.Context, old []*BlockDeletionMark) ([]*BlockDeletionMark, error) {
   177  	out := make([]*BlockDeletionMark, 0, len(old))
   178  
   179  	// Find all markers in the storage.
   180  	discovered, err := block.ListBlockDeletionMarks(ctx, w.bkt)
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  
   185  	level.Info(w.logger).Log("msg", "listed deletion markers", "count", len(discovered))
   186  
   187  	// Since deletion marks are immutable, all markers already existing in the index can just be copied.
   188  	for _, m := range old {
   189  		if _, ok := discovered[m.ID]; ok {
   190  			out = append(out, m)
   191  			delete(discovered, m.ID)
   192  		}
   193  	}
   194  
   195  	// Remaining markers are new ones and we have to fetch them.
   196  	for id := range discovered {
   197  		m, err := w.updateBlockDeletionMarkIndexEntry(ctx, id)
   198  		if errors.Is(err, ErrBlockDeletionMarkNotFound) {
   199  			// This could happen if the block is permanently deleted between the "list objects" and now.
   200  			level.Warn(w.logger).Log("msg", "skipped missing block deletion mark when updating bucket index", "block", id.String())
   201  			continue
   202  		}
   203  		if errors.Is(err, ErrBlockDeletionMarkCorrupted) {
   204  			level.Error(w.logger).Log("msg", "skipped corrupted block deletion mark when updating bucket index", "block", id.String(), "err", err)
   205  			continue
   206  		}
   207  		if err != nil {
   208  			return nil, err
   209  		}
   210  
   211  		out = append(out, m)
   212  	}
   213  
   214  	level.Info(w.logger).Log("msg", "updated deletion markers for recently marked blocks", "count", len(discovered), "total_deletion_markers", len(out))
   215  
   216  	return out, nil
   217  }
   218  
   219  func (w *Updater) updateBlockDeletionMarkIndexEntry(ctx context.Context, id ulid.ULID) (*BlockDeletionMark, error) {
   220  	m := block.DeletionMark{}
   221  
   222  	if err := block.ReadMarker(ctx, w.logger, w.bkt, id.String(), &m); err != nil {
   223  		if errors.Is(err, block.ErrorMarkerNotFound) {
   224  			return nil, errors.Wrap(ErrBlockDeletionMarkNotFound, err.Error())
   225  		}
   226  		if errors.Is(err, block.ErrorUnmarshalMarker) {
   227  			return nil, errors.Wrap(ErrBlockDeletionMarkCorrupted, err.Error())
   228  		}
   229  		return nil, err
   230  	}
   231  
   232  	return DeletionMarkFromBlockMarker(&m), nil
   233  }