github.com/thanos-io/thanos@v0.32.5/pkg/shipper/shipper.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  // Package shipper detects directories on the local file system and uploads
     5  // them to a block storage.
     6  package shipper
     7  
     8  import (
     9  	"context"
    10  	"encoding/json"
    11  	"math"
    12  	"os"
    13  	"path"
    14  	"path/filepath"
    15  	"sort"
    16  	"sync"
    17  
    18  	"github.com/go-kit/log"
    19  	"github.com/go-kit/log/level"
    20  	"github.com/oklog/ulid"
    21  	"github.com/pkg/errors"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  	"github.com/prometheus/client_golang/prometheus/promauto"
    24  	"github.com/prometheus/prometheus/model/labels"
    25  	"github.com/prometheus/prometheus/tsdb"
    26  	"github.com/prometheus/prometheus/tsdb/fileutil"
    27  
    28  	"github.com/thanos-io/objstore"
    29  
    30  	"github.com/thanos-io/thanos/pkg/block"
    31  	"github.com/thanos-io/thanos/pkg/block/metadata"
    32  	"github.com/thanos-io/thanos/pkg/runutil"
    33  )
    34  
    35  type metrics struct {
    36  	dirSyncs          prometheus.Counter
    37  	dirSyncFailures   prometheus.Counter
    38  	uploads           prometheus.Counter
    39  	uploadFailures    prometheus.Counter
    40  	uploadedCompacted prometheus.Gauge
    41  }
    42  
    43  func newMetrics(reg prometheus.Registerer) *metrics {
    44  	var m metrics
    45  
    46  	m.dirSyncs = promauto.With(reg).NewCounter(prometheus.CounterOpts{
    47  		Name: "thanos_shipper_dir_syncs_total",
    48  		Help: "Total number of dir syncs",
    49  	})
    50  	m.dirSyncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{
    51  		Name: "thanos_shipper_dir_sync_failures_total",
    52  		Help: "Total number of failed dir syncs",
    53  	})
    54  	m.uploads = promauto.With(reg).NewCounter(prometheus.CounterOpts{
    55  		Name: "thanos_shipper_uploads_total",
    56  		Help: "Total number of uploaded blocks",
    57  	})
    58  	m.uploadFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{
    59  		Name: "thanos_shipper_upload_failures_total",
    60  		Help: "Total number of block upload failures",
    61  	})
    62  	m.uploadedCompacted = promauto.With(reg).NewGauge(prometheus.GaugeOpts{
    63  		Name: "thanos_shipper_upload_compacted_done",
    64  		Help: "If 1 it means shipper uploaded all compacted blocks from the filesystem.",
    65  	})
    66  	return &m
    67  }
    68  
    69  // Shipper watches a directory for matching files and directories and uploads
    70  // them to a remote data store.
    71  type Shipper struct {
    72  	logger  log.Logger
    73  	dir     string
    74  	metrics *metrics
    75  	bucket  objstore.Bucket
    76  	source  metadata.SourceType
    77  
    78  	uploadCompactedFunc    func() bool
    79  	allowOutOfOrderUploads bool
    80  	hashFunc               metadata.HashFunc
    81  
    82  	labels func() labels.Labels
    83  	mtx    sync.RWMutex
    84  }
    85  
    86  // New creates a new shipper that detects new TSDB blocks in dir and uploads them to
    87  // remote if necessary. It attaches the Thanos metadata section in each meta JSON file.
    88  // If uploadCompacted is enabled, it also uploads compacted blocks which are already in filesystem.
    89  func New(
    90  	logger log.Logger,
    91  	r prometheus.Registerer,
    92  	dir string,
    93  	bucket objstore.Bucket,
    94  	lbls func() labels.Labels,
    95  	source metadata.SourceType,
    96  	uploadCompactedFunc func() bool,
    97  	allowOutOfOrderUploads bool,
    98  	hashFunc metadata.HashFunc,
    99  ) *Shipper {
   100  	if logger == nil {
   101  		logger = log.NewNopLogger()
   102  	}
   103  	if lbls == nil {
   104  		lbls = func() labels.Labels { return nil }
   105  	}
   106  
   107  	if uploadCompactedFunc == nil {
   108  		uploadCompactedFunc = func() bool {
   109  			return false
   110  		}
   111  	}
   112  	return &Shipper{
   113  		logger:                 logger,
   114  		dir:                    dir,
   115  		bucket:                 bucket,
   116  		labels:                 lbls,
   117  		metrics:                newMetrics(r),
   118  		source:                 source,
   119  		allowOutOfOrderUploads: allowOutOfOrderUploads,
   120  		uploadCompactedFunc:    uploadCompactedFunc,
   121  		hashFunc:               hashFunc,
   122  	}
   123  }
   124  
   125  func (s *Shipper) SetLabels(lbls labels.Labels) {
   126  	s.mtx.Lock()
   127  	defer s.mtx.Unlock()
   128  
   129  	s.labels = func() labels.Labels { return lbls }
   130  }
   131  
   132  func (s *Shipper) getLabels() labels.Labels {
   133  	s.mtx.RLock()
   134  	defer s.mtx.RUnlock()
   135  
   136  	return s.labels()
   137  }
   138  
   139  // Timestamps returns the minimum timestamp for which data is available and the highest timestamp
   140  // of blocks that were successfully uploaded.
   141  func (s *Shipper) Timestamps() (minTime, maxSyncTime int64, err error) {
   142  	meta, err := ReadMetaFile(s.dir)
   143  	if err != nil {
   144  		return 0, 0, errors.Wrap(err, "read shipper meta file")
   145  	}
   146  	// Build a map of blocks we already uploaded.
   147  	hasUploaded := make(map[ulid.ULID]struct{}, len(meta.Uploaded))
   148  	for _, id := range meta.Uploaded {
   149  		hasUploaded[id] = struct{}{}
   150  	}
   151  
   152  	minTime = math.MaxInt64
   153  	maxSyncTime = math.MinInt64
   154  
   155  	metas, err := s.blockMetasFromOldest()
   156  	if err != nil {
   157  		return 0, 0, err
   158  	}
   159  	for _, m := range metas {
   160  		if m.MinTime < minTime {
   161  			minTime = m.MinTime
   162  		}
   163  		if _, ok := hasUploaded[m.ULID]; ok && m.MaxTime > maxSyncTime {
   164  			maxSyncTime = m.MaxTime
   165  		}
   166  	}
   167  
   168  	if minTime == math.MaxInt64 {
   169  		// No block yet found. We cannot assume any min block size so propagate 0 minTime.
   170  		minTime = 0
   171  	}
   172  	return minTime, maxSyncTime, nil
   173  }
   174  
   175  type lazyOverlapChecker struct {
   176  	synced bool
   177  	logger log.Logger
   178  	bucket objstore.Bucket
   179  	labels func() labels.Labels
   180  
   181  	metas       []tsdb.BlockMeta
   182  	lookupMetas map[ulid.ULID]struct{}
   183  }
   184  
   185  func newLazyOverlapChecker(logger log.Logger, bucket objstore.Bucket, labels func() labels.Labels) *lazyOverlapChecker {
   186  	return &lazyOverlapChecker{
   187  		logger: logger,
   188  		bucket: bucket,
   189  		labels: labels,
   190  
   191  		lookupMetas: map[ulid.ULID]struct{}{},
   192  	}
   193  }
   194  
   195  func (c *lazyOverlapChecker) sync(ctx context.Context) error {
   196  	if err := c.bucket.Iter(ctx, "", func(path string) error {
   197  		id, ok := block.IsBlockDir(path)
   198  		if !ok {
   199  			return nil
   200  		}
   201  
   202  		m, err := block.DownloadMeta(ctx, c.logger, c.bucket, id)
   203  		if err != nil {
   204  			return err
   205  		}
   206  
   207  		if !labels.Equal(labels.FromMap(m.Thanos.Labels), c.labels()) {
   208  			return nil
   209  		}
   210  
   211  		c.metas = append(c.metas, m.BlockMeta)
   212  		c.lookupMetas[m.ULID] = struct{}{}
   213  		return nil
   214  
   215  	}); err != nil {
   216  		return errors.Wrap(err, "get all block meta.")
   217  	}
   218  
   219  	c.synced = true
   220  	return nil
   221  }
   222  
   223  func (c *lazyOverlapChecker) IsOverlapping(ctx context.Context, newMeta tsdb.BlockMeta) error {
   224  	if !c.synced {
   225  		level.Info(c.logger).Log("msg", "gathering all existing blocks from the remote bucket for check", "id", newMeta.ULID.String())
   226  		if err := c.sync(ctx); err != nil {
   227  			return err
   228  		}
   229  	}
   230  
   231  	// TODO(bwplotka) so confusing! we need to sort it first. Add comment to TSDB code.
   232  	metas := append([]tsdb.BlockMeta{newMeta}, c.metas...)
   233  	sort.Slice(metas, func(i, j int) bool {
   234  		return metas[i].MinTime < metas[j].MinTime
   235  	})
   236  	if o := tsdb.OverlappingBlocks(metas); len(o) > 0 {
   237  		// TODO(bwplotka): Consider checking if overlaps relates to block in concern?
   238  		return errors.Errorf("shipping compacted block %s is blocked; overlap spotted: %s", newMeta.ULID, o.String())
   239  	}
   240  	return nil
   241  }
   242  
   243  // Sync performs a single synchronization, which ensures all non-compacted local blocks have been uploaded
   244  // to the object bucket once.
   245  //
   246  // If uploaded.
   247  //
   248  // It is not concurrency-safe, however it is compactor-safe (running concurrently with compactor is ok).
   249  func (s *Shipper) Sync(ctx context.Context) (uploaded int, err error) {
   250  	meta, err := ReadMetaFile(s.dir)
   251  	if err != nil {
   252  		// If we encounter any error, proceed with an empty meta file and overwrite it later.
   253  		// The meta file is only used to avoid unnecessary bucket.Exists call,
   254  		// which are properly handled by the system if their occur anyway.
   255  		if !os.IsNotExist(err) {
   256  			level.Warn(s.logger).Log("msg", "reading meta file failed, will override it", "err", err)
   257  		}
   258  		meta = &Meta{Version: MetaVersion1}
   259  	}
   260  
   261  	// Build a map of blocks we already uploaded.
   262  	hasUploaded := make(map[ulid.ULID]struct{}, len(meta.Uploaded))
   263  	for _, id := range meta.Uploaded {
   264  		hasUploaded[id] = struct{}{}
   265  	}
   266  
   267  	// Reset the uploaded slice so we can rebuild it only with blocks that still exist locally.
   268  	meta.Uploaded = nil
   269  
   270  	var (
   271  		checker    = newLazyOverlapChecker(s.logger, s.bucket, s.getLabels)
   272  		uploadErrs int
   273  	)
   274  
   275  	uploadCompacted := s.uploadCompactedFunc()
   276  	metas, err := s.blockMetasFromOldest()
   277  	if err != nil {
   278  		return 0, err
   279  	}
   280  	for _, m := range metas {
   281  		// Do not sync a block if we already uploaded or ignored it. If it's no longer found in the bucket,
   282  		// it was generally removed by the compaction process.
   283  		if _, uploaded := hasUploaded[m.ULID]; uploaded {
   284  			meta.Uploaded = append(meta.Uploaded, m.ULID)
   285  			continue
   286  		}
   287  
   288  		if m.Stats.NumSamples == 0 {
   289  			// Ignore empty blocks.
   290  			level.Debug(s.logger).Log("msg", "ignoring empty block", "block", m.ULID)
   291  			continue
   292  		}
   293  
   294  		// We only ship of the first compacted block level as normal flow.
   295  		if m.Compaction.Level > 1 {
   296  			if !uploadCompacted {
   297  				continue
   298  			}
   299  		}
   300  
   301  		// Check against bucket if the meta file for this block exists.
   302  		ok, err := s.bucket.Exists(ctx, path.Join(m.ULID.String(), block.MetaFilename))
   303  		if err != nil {
   304  			return 0, errors.Wrap(err, "check exists")
   305  		}
   306  		if ok {
   307  			meta.Uploaded = append(meta.Uploaded, m.ULID)
   308  			continue
   309  		}
   310  
   311  		// Skip overlap check if out of order uploads is enabled.
   312  		if m.Compaction.Level > 1 && !s.allowOutOfOrderUploads {
   313  			if err := checker.IsOverlapping(ctx, m.BlockMeta); err != nil {
   314  				return 0, errors.Errorf("Found overlap or error during sync, cannot upload compacted block, details: %v", err)
   315  			}
   316  		}
   317  
   318  		if err := s.upload(ctx, m); err != nil {
   319  			if !s.allowOutOfOrderUploads {
   320  				return 0, errors.Wrapf(err, "upload %v", m.ULID)
   321  			}
   322  
   323  			// No error returned, just log line. This is because we want other blocks to be uploaded even
   324  			// though this one failed. It will be retried on second Sync iteration.
   325  			level.Error(s.logger).Log("msg", "shipping failed", "block", m.ULID, "err", err)
   326  			uploadErrs++
   327  			continue
   328  		}
   329  		meta.Uploaded = append(meta.Uploaded, m.ULID)
   330  		uploaded++
   331  		s.metrics.uploads.Inc()
   332  	}
   333  	if err := WriteMetaFile(s.logger, s.dir, meta); err != nil {
   334  		level.Warn(s.logger).Log("msg", "updating meta file failed", "err", err)
   335  	}
   336  
   337  	s.metrics.dirSyncs.Inc()
   338  	if uploadErrs > 0 {
   339  		s.metrics.uploadFailures.Add(float64(uploadErrs))
   340  		return uploaded, errors.Errorf("failed to sync %v blocks", uploadErrs)
   341  	}
   342  
   343  	if uploadCompacted {
   344  		s.metrics.uploadedCompacted.Set(1)
   345  	} else {
   346  		s.metrics.uploadedCompacted.Set(0)
   347  	}
   348  	return uploaded, nil
   349  }
   350  
   351  // sync uploads the block if not exists in remote storage.
   352  // TODO(khyatisoneji): Double check if block does not have deletion-mark.json for some reason, otherwise log it or return error.
   353  func (s *Shipper) upload(ctx context.Context, meta *metadata.Meta) error {
   354  	level.Info(s.logger).Log("msg", "upload new block", "id", meta.ULID)
   355  
   356  	// We hard-link the files into a temporary upload directory so we are not affected
   357  	// by other operations happening against the TSDB directory.
   358  	updir := filepath.Join(s.dir, "thanos", "upload", meta.ULID.String())
   359  
   360  	// Remove updir just in case.
   361  	if err := os.RemoveAll(updir); err != nil {
   362  		return errors.Wrap(err, "clean upload directory")
   363  	}
   364  	if err := os.MkdirAll(updir, 0750); err != nil {
   365  		return errors.Wrap(err, "create upload dir")
   366  	}
   367  	defer func() {
   368  		if err := os.RemoveAll(updir); err != nil {
   369  			level.Error(s.logger).Log("msg", "failed to clean upload directory", "err", err)
   370  		}
   371  	}()
   372  
   373  	dir := filepath.Join(s.dir, meta.ULID.String())
   374  	if err := hardlinkBlock(dir, updir); err != nil {
   375  		return errors.Wrap(err, "hard link block")
   376  	}
   377  	// Attach current labels and write a new meta file with Thanos extensions.
   378  	if lset := s.getLabels(); lset != nil {
   379  		meta.Thanos.Labels = lset.Map()
   380  	}
   381  	meta.Thanos.Source = s.source
   382  	meta.Thanos.SegmentFiles = block.GetSegmentFiles(updir)
   383  	if err := meta.WriteToDir(s.logger, updir); err != nil {
   384  		return errors.Wrap(err, "write meta file")
   385  	}
   386  	return block.Upload(ctx, s.logger, s.bucket, updir, s.hashFunc)
   387  }
   388  
   389  // blockMetasFromOldest returns the block meta of each block found in dir
   390  // sorted by minTime asc.
   391  func (s *Shipper) blockMetasFromOldest() (metas []*metadata.Meta, _ error) {
   392  	fis, err := os.ReadDir(s.dir)
   393  	if err != nil {
   394  		return nil, errors.Wrap(err, "read dir")
   395  	}
   396  	names := make([]string, 0, len(fis))
   397  	for _, fi := range fis {
   398  		names = append(names, fi.Name())
   399  	}
   400  	for _, n := range names {
   401  		if _, ok := block.IsBlockDir(n); !ok {
   402  			continue
   403  		}
   404  		dir := filepath.Join(s.dir, n)
   405  
   406  		fi, err := os.Stat(dir)
   407  		if err != nil {
   408  			return nil, errors.Wrapf(err, "stat block %v", dir)
   409  		}
   410  		if !fi.IsDir() {
   411  			continue
   412  		}
   413  		m, err := metadata.ReadFromDir(dir)
   414  		if err != nil {
   415  			return nil, errors.Wrapf(err, "read metadata for block %v", dir)
   416  		}
   417  		metas = append(metas, m)
   418  	}
   419  	sort.Slice(metas, func(i, j int) bool {
   420  		return metas[i].BlockMeta.MinTime < metas[j].BlockMeta.MinTime
   421  	})
   422  	return metas, nil
   423  }
   424  
   425  func hardlinkBlock(src, dst string) error {
   426  	chunkDir := filepath.Join(dst, block.ChunksDirname)
   427  
   428  	if err := os.MkdirAll(chunkDir, 0750); err != nil {
   429  		return errors.Wrap(err, "create chunks dir")
   430  	}
   431  
   432  	fis, err := os.ReadDir(filepath.Join(src, block.ChunksDirname))
   433  	if err != nil {
   434  		return errors.Wrap(err, "read chunk dir")
   435  	}
   436  	files := make([]string, 0, len(fis))
   437  	for _, fi := range fis {
   438  		files = append(files, fi.Name())
   439  	}
   440  	for i, fn := range files {
   441  		files[i] = filepath.Join(block.ChunksDirname, fn)
   442  	}
   443  	files = append(files, block.MetaFilename, block.IndexFilename)
   444  
   445  	for _, fn := range files {
   446  		if err := os.Link(filepath.Join(src, fn), filepath.Join(dst, fn)); err != nil {
   447  			return errors.Wrapf(err, "hard link file %s", fn)
   448  		}
   449  	}
   450  	return nil
   451  }
   452  
   453  // Meta defines the format thanos.shipper.json file that the shipper places in the data directory.
   454  type Meta struct {
   455  	Version  int         `json:"version"`
   456  	Uploaded []ulid.ULID `json:"uploaded"`
   457  }
   458  
   459  const (
   460  	// MetaFilename is the known JSON filename for meta information.
   461  	MetaFilename = "thanos.shipper.json"
   462  
   463  	// MetaVersion1 represents 1 version of meta.
   464  	MetaVersion1 = 1
   465  )
   466  
   467  // WriteMetaFile writes the given meta into <dir>/thanos.shipper.json.
   468  func WriteMetaFile(logger log.Logger, dir string, meta *Meta) error {
   469  	// Make any changes to the file appear atomic.
   470  	path := filepath.Join(dir, MetaFilename)
   471  	tmp := path + ".tmp"
   472  
   473  	f, err := os.Create(tmp)
   474  	if err != nil {
   475  		return err
   476  	}
   477  
   478  	enc := json.NewEncoder(f)
   479  	enc.SetIndent("", "\t")
   480  
   481  	if err := enc.Encode(meta); err != nil {
   482  		runutil.CloseWithLogOnErr(logger, f, "write meta file close")
   483  		return err
   484  	}
   485  	if err := f.Close(); err != nil {
   486  		return err
   487  	}
   488  	return renameFile(logger, tmp, path)
   489  }
   490  
   491  // ReadMetaFile reads the given meta from <dir>/thanos.shipper.json.
   492  func ReadMetaFile(dir string) (*Meta, error) {
   493  	fpath := filepath.Join(dir, filepath.Clean(MetaFilename))
   494  	b, err := os.ReadFile(fpath)
   495  	if err != nil {
   496  		return nil, errors.Wrapf(err, "failed to read %s", fpath)
   497  	}
   498  
   499  	var m Meta
   500  	if err := json.Unmarshal(b, &m); err != nil {
   501  		return nil, errors.Wrapf(err, "failed to parse %s as JSON: %q", fpath, string(b))
   502  	}
   503  	if m.Version != MetaVersion1 {
   504  		return nil, errors.Errorf("unexpected meta file version %d", m.Version)
   505  	}
   506  
   507  	return &m, nil
   508  }
   509  
   510  func renameFile(logger log.Logger, from, to string) error {
   511  	if err := os.RemoveAll(to); err != nil {
   512  		return err
   513  	}
   514  	if err := os.Rename(from, to); err != nil {
   515  		return err
   516  	}
   517  
   518  	// Directory was renamed; sync parent dir to persist rename.
   519  	pdir, err := fileutil.OpenDir(filepath.Dir(to))
   520  	if err != nil {
   521  		return err
   522  	}
   523  
   524  	if err = fileutil.Fdatasync(pdir); err != nil {
   525  		runutil.CloseWithLogOnErr(logger, pdir, "rename file dir close")
   526  		return err
   527  	}
   528  	return pdir.Close()
   529  }