github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/storage/tsdb/bucketindex/updater.go (about)

     1  package bucketindex
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"io/ioutil"
     7  	"path"
     8  	"time"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/go-kit/log/level"
    12  	"github.com/grafana/dskit/runutil"
    13  	"github.com/oklog/ulid"
    14  	"github.com/pkg/errors"
    15  	"github.com/thanos-io/thanos/pkg/block"
    16  	"github.com/thanos-io/thanos/pkg/block/metadata"
    17  	"github.com/thanos-io/thanos/pkg/objstore"
    18  
    19  	"github.com/cortexproject/cortex/pkg/storage/bucket"
    20  	util_log "github.com/cortexproject/cortex/pkg/util/log"
    21  )
    22  
    23  var (
    24  	ErrBlockMetaNotFound          = block.ErrorSyncMetaNotFound
    25  	ErrBlockMetaCorrupted         = block.ErrorSyncMetaCorrupted
    26  	ErrBlockDeletionMarkNotFound  = errors.New("block deletion mark not found")
    27  	ErrBlockDeletionMarkCorrupted = errors.New("block deletion mark corrupted")
    28  )
    29  
    30  // Updater is responsible to generate an update in-memory bucket index.
    31  type Updater struct {
    32  	bkt    objstore.InstrumentedBucket
    33  	logger log.Logger
    34  }
    35  
    36  func NewUpdater(bkt objstore.Bucket, userID string, cfgProvider bucket.TenantConfigProvider, logger log.Logger) *Updater {
    37  	return &Updater{
    38  		bkt:    bucket.NewUserBucketClient(userID, bkt, cfgProvider),
    39  		logger: util_log.WithUserID(userID, logger),
    40  	}
    41  }
    42  
    43  // UpdateIndex generates the bucket index and returns it, without storing it to the storage.
    44  // If the old index is not passed in input, then the bucket index will be generated from scratch.
    45  func (w *Updater) UpdateIndex(ctx context.Context, old *Index) (*Index, map[ulid.ULID]error, error) {
    46  	var oldBlocks []*Block
    47  	var oldBlockDeletionMarks []*BlockDeletionMark
    48  
    49  	// Read the old index, if provided.
    50  	if old != nil {
    51  		oldBlocks = old.Blocks
    52  		oldBlockDeletionMarks = old.BlockDeletionMarks
    53  	}
    54  
    55  	blocks, partials, err := w.updateBlocks(ctx, oldBlocks)
    56  	if err != nil {
    57  		return nil, nil, err
    58  	}
    59  
    60  	blockDeletionMarks, err := w.updateBlockDeletionMarks(ctx, oldBlockDeletionMarks)
    61  	if err != nil {
    62  		return nil, nil, err
    63  	}
    64  
    65  	return &Index{
    66  		Version:            IndexVersion1,
    67  		Blocks:             blocks,
    68  		BlockDeletionMarks: blockDeletionMarks,
    69  		UpdatedAt:          time.Now().Unix(),
    70  	}, partials, nil
    71  }
    72  
    73  func (w *Updater) updateBlocks(ctx context.Context, old []*Block) (blocks []*Block, partials map[ulid.ULID]error, _ error) {
    74  	discovered := map[ulid.ULID]struct{}{}
    75  	partials = map[ulid.ULID]error{}
    76  
    77  	// Find all blocks in the storage.
    78  	err := w.bkt.Iter(ctx, "", func(name string) error {
    79  		if id, ok := block.IsBlockDir(name); ok {
    80  			discovered[id] = struct{}{}
    81  		}
    82  		return nil
    83  	})
    84  	if err != nil {
    85  		return nil, nil, errors.Wrap(err, "list blocks")
    86  	}
    87  
    88  	// Since blocks are immutable, all blocks already existing in the index can just be copied.
    89  	for _, b := range old {
    90  		if _, ok := discovered[b.ID]; ok {
    91  			blocks = append(blocks, b)
    92  			delete(discovered, b.ID)
    93  		}
    94  	}
    95  
    96  	// Remaining blocks are new ones and we have to fetch the meta.json for each of them, in order
    97  	// to find out if their upload has been completed (meta.json is uploaded last) and get the block
    98  	// information to store in the bucket index.
    99  	for id := range discovered {
   100  		b, err := w.updateBlockIndexEntry(ctx, id)
   101  		if err == nil {
   102  			blocks = append(blocks, b)
   103  			continue
   104  		}
   105  
   106  		if errors.Is(err, ErrBlockMetaNotFound) {
   107  			partials[id] = err
   108  			level.Warn(w.logger).Log("msg", "skipped partial block when updating bucket index", "block", id.String())
   109  			continue
   110  		}
   111  		if errors.Is(err, ErrBlockMetaCorrupted) {
   112  			partials[id] = err
   113  			level.Error(w.logger).Log("msg", "skipped block with corrupted meta.json when updating bucket index", "block", id.String(), "err", err)
   114  			continue
   115  		}
   116  		return nil, nil, err
   117  	}
   118  
   119  	return blocks, partials, nil
   120  }
   121  
   122  func (w *Updater) updateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Block, error) {
   123  	metaFile := path.Join(id.String(), block.MetaFilename)
   124  
   125  	// Get the block's meta.json file.
   126  	r, err := w.bkt.Get(ctx, metaFile)
   127  	if w.bkt.IsObjNotFoundErr(err) {
   128  		return nil, ErrBlockMetaNotFound
   129  	}
   130  	if err != nil {
   131  		return nil, errors.Wrapf(err, "get block meta file: %v", metaFile)
   132  	}
   133  	defer runutil.CloseWithLogOnErr(w.logger, r, "close get block meta file")
   134  
   135  	metaContent, err := ioutil.ReadAll(r)
   136  	if err != nil {
   137  		return nil, errors.Wrapf(err, "read block meta file: %v", metaFile)
   138  	}
   139  
   140  	// Unmarshal it.
   141  	m := metadata.Meta{}
   142  	if err := json.Unmarshal(metaContent, &m); err != nil {
   143  		return nil, errors.Wrapf(ErrBlockMetaCorrupted, "unmarshal block meta file %s: %v", metaFile, err)
   144  	}
   145  
   146  	if m.Version != metadata.TSDBVersion1 {
   147  		return nil, errors.Errorf("unexpected block meta version: %s version: %d", metaFile, m.Version)
   148  	}
   149  
   150  	block := BlockFromThanosMeta(m)
   151  
   152  	// Get the meta.json attributes.
   153  	attrs, err := w.bkt.Attributes(ctx, metaFile)
   154  	if err != nil {
   155  		return nil, errors.Wrapf(err, "read meta file attributes: %v", metaFile)
   156  	}
   157  
   158  	// Since the meta.json file is the last file of a block being uploaded and it's immutable
   159  	// we can safely assume that the last modified timestamp of the meta.json is the time when
   160  	// the block has completed to be uploaded.
   161  	block.UploadedAt = attrs.LastModified.Unix()
   162  
   163  	return block, nil
   164  }
   165  
   166  func (w *Updater) updateBlockDeletionMarks(ctx context.Context, old []*BlockDeletionMark) ([]*BlockDeletionMark, error) {
   167  	out := make([]*BlockDeletionMark, 0, len(old))
   168  	discovered := map[ulid.ULID]struct{}{}
   169  
   170  	// Find all markers in the storage.
   171  	err := w.bkt.Iter(ctx, MarkersPathname+"/", func(name string) error {
   172  		if blockID, ok := IsBlockDeletionMarkFilename(path.Base(name)); ok {
   173  			discovered[blockID] = struct{}{}
   174  		}
   175  		return nil
   176  	})
   177  	if err != nil {
   178  		return nil, errors.Wrap(err, "list block deletion marks")
   179  	}
   180  
   181  	// Since deletion marks are immutable, all markers already existing in the index can just be copied.
   182  	for _, m := range old {
   183  		if _, ok := discovered[m.ID]; ok {
   184  			out = append(out, m)
   185  			delete(discovered, m.ID)
   186  		}
   187  	}
   188  
   189  	// Remaining markers are new ones and we have to fetch them.
   190  	for id := range discovered {
   191  		m, err := w.updateBlockDeletionMarkIndexEntry(ctx, id)
   192  		if errors.Is(err, ErrBlockDeletionMarkNotFound) {
   193  			// This could happen if the block is permanently deleted between the "list objects" and now.
   194  			level.Warn(w.logger).Log("msg", "skipped missing block deletion mark when updating bucket index", "block", id.String())
   195  			continue
   196  		}
   197  		if errors.Is(err, ErrBlockDeletionMarkCorrupted) {
   198  			level.Error(w.logger).Log("msg", "skipped corrupted block deletion mark when updating bucket index", "block", id.String(), "err", err)
   199  			continue
   200  		}
   201  		if err != nil {
   202  			return nil, err
   203  		}
   204  
   205  		out = append(out, m)
   206  	}
   207  
   208  	return out, nil
   209  }
   210  
   211  func (w *Updater) updateBlockDeletionMarkIndexEntry(ctx context.Context, id ulid.ULID) (*BlockDeletionMark, error) {
   212  	m := metadata.DeletionMark{}
   213  
   214  	if err := metadata.ReadMarker(ctx, w.logger, w.bkt, id.String(), &m); err != nil {
   215  		if errors.Is(err, metadata.ErrorMarkerNotFound) {
   216  			return nil, errors.Wrap(ErrBlockDeletionMarkNotFound, err.Error())
   217  		}
   218  		if errors.Is(err, metadata.ErrorUnmarshalMarker) {
   219  			return nil, errors.Wrap(ErrBlockDeletionMarkCorrupted, err.Error())
   220  		}
   221  		return nil, err
   222  	}
   223  
   224  	return BlockDeletionMarkFromThanosMarker(&m), nil
   225  }