github.com/thanos-io/thanos@v0.32.5/cmd/thanos/downsample.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package main
     5  
     6  import (
     7  	"context"
     8  	"os"
     9  	"path/filepath"
    10  	"sort"
    11  	"sync"
    12  	"time"
    13  
    14  	extflag "github.com/efficientgo/tools/extkingpin"
    15  	"github.com/go-kit/log"
    16  	"github.com/go-kit/log/level"
    17  	"github.com/oklog/run"
    18  	"github.com/oklog/ulid"
    19  	"github.com/pkg/errors"
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	"github.com/prometheus/client_golang/prometheus/promauto"
    22  	"github.com/prometheus/prometheus/tsdb"
    23  	"github.com/prometheus/prometheus/tsdb/chunkenc"
    24  
    25  	"github.com/thanos-io/objstore"
    26  	"github.com/thanos-io/objstore/client"
    27  	objstoretracing "github.com/thanos-io/objstore/tracing/opentracing"
    28  
    29  	"github.com/thanos-io/thanos/pkg/block"
    30  	"github.com/thanos-io/thanos/pkg/block/metadata"
    31  	"github.com/thanos-io/thanos/pkg/compact/downsample"
    32  	"github.com/thanos-io/thanos/pkg/component"
    33  	"github.com/thanos-io/thanos/pkg/errutil"
    34  	"github.com/thanos-io/thanos/pkg/extprom"
    35  	"github.com/thanos-io/thanos/pkg/prober"
    36  	"github.com/thanos-io/thanos/pkg/runutil"
    37  	httpserver "github.com/thanos-io/thanos/pkg/server/http"
    38  )
    39  
    40  type DownsampleMetrics struct {
    41  	downsamples        *prometheus.CounterVec
    42  	downsampleFailures *prometheus.CounterVec
    43  	downsampleDuration *prometheus.HistogramVec
    44  }
    45  
    46  func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics {
    47  	m := new(DownsampleMetrics)
    48  
    49  	m.downsamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    50  		Name: "thanos_compact_downsample_total",
    51  		Help: "Total number of downsampling attempts.",
    52  	}, []string{"group"})
    53  	m.downsampleFailures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    54  		Name: "thanos_compact_downsample_failures_total",
    55  		Help: "Total number of failed downsampling attempts.",
    56  	}, []string{"group"})
    57  	m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
    58  		Name:    "thanos_compact_downsample_duration_seconds",
    59  		Help:    "Duration of downsample runs",
    60  		Buckets: []float64{60, 300, 900, 1800, 3600, 7200, 14400}, // 1m, 5m, 15m, 30m, 60m, 120m, 240m
    61  	}, []string{"group"})
    62  
    63  	return m
    64  }
    65  
    66  func RunDownsample(
    67  	g *run.Group,
    68  	logger log.Logger,
    69  	reg *prometheus.Registry,
    70  	httpBindAddr string,
    71  	httpTLSConfig string,
    72  	httpGracePeriod time.Duration,
    73  	dataDir string,
    74  	waitInterval time.Duration,
    75  	downsampleConcurrency int,
    76  	blockFilesConcurrency int,
    77  	objStoreConfig *extflag.PathOrContent,
    78  	comp component.Component,
    79  	hashFunc metadata.HashFunc,
    80  ) error {
    81  	confContentYaml, err := objStoreConfig.Content()
    82  	if err != nil {
    83  		return err
    84  	}
    85  
    86  	bkt, err := client.NewBucket(logger, confContentYaml, component.Downsample.String())
    87  	if err != nil {
    88  		return err
    89  	}
    90  	insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name()))
    91  
    92  	// While fetching blocks, filter out blocks that were marked for no downsample.
    93  	metaFetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix("thanos_", reg), []block.MetadataFilter{
    94  		block.NewDeduplicateFilter(block.FetcherConcurrency),
    95  		downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt),
    96  	})
    97  	if err != nil {
    98  		return errors.Wrap(err, "create meta fetcher")
    99  	}
   100  
   101  	// Ensure we close up everything properly.
   102  	defer func() {
   103  		if err != nil {
   104  			runutil.CloseWithLogOnErr(logger, insBkt, "bucket client")
   105  		}
   106  	}()
   107  
   108  	httpProbe := prober.NewHTTP()
   109  	statusProber := prober.Combine(
   110  		httpProbe,
   111  		prober.NewInstrumentation(comp, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)),
   112  	)
   113  
   114  	metrics := newDownsampleMetrics(reg)
   115  	// Start cycle of syncing blocks from the bucket and garbage collecting the bucket.
   116  	{
   117  		ctx, cancel := context.WithCancel(context.Background())
   118  
   119  		g.Add(func() error {
   120  			defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client")
   121  			statusProber.Ready()
   122  
   123  			return runutil.Repeat(waitInterval, ctx.Done(), func() error {
   124  				level.Info(logger).Log("msg", "start first pass of downsampling")
   125  				metas, _, err := metaFetcher.Fetch(ctx)
   126  				if err != nil {
   127  					return errors.Wrap(err, "sync before first pass of downsampling")
   128  				}
   129  
   130  				for _, meta := range metas {
   131  					groupKey := meta.Thanos.GroupKey()
   132  					metrics.downsamples.WithLabelValues(groupKey)
   133  					metrics.downsampleFailures.WithLabelValues(groupKey)
   134  				}
   135  				if err := downsampleBucket(ctx, logger, metrics, insBkt, metas, dataDir, downsampleConcurrency, blockFilesConcurrency, hashFunc, false); err != nil {
   136  					return errors.Wrap(err, "downsampling failed")
   137  				}
   138  
   139  				level.Info(logger).Log("msg", "start second pass of downsampling")
   140  				metas, _, err = metaFetcher.Fetch(ctx)
   141  				if err != nil {
   142  					return errors.Wrap(err, "sync before second pass of downsampling")
   143  				}
   144  				if err := downsampleBucket(ctx, logger, metrics, insBkt, metas, dataDir, downsampleConcurrency, blockFilesConcurrency, hashFunc, false); err != nil {
   145  					return errors.Wrap(err, "downsampling failed")
   146  				}
   147  				return nil
   148  			})
   149  		}, func(error) {
   150  			cancel()
   151  		})
   152  	}
   153  
   154  	srv := httpserver.New(logger, reg, comp, httpProbe,
   155  		httpserver.WithListen(httpBindAddr),
   156  		httpserver.WithGracePeriod(httpGracePeriod),
   157  		httpserver.WithTLSConfig(httpTLSConfig),
   158  	)
   159  
   160  	g.Add(func() error {
   161  		statusProber.Healthy()
   162  
   163  		return srv.ListenAndServe()
   164  	}, func(err error) {
   165  		statusProber.NotReady(err)
   166  		defer statusProber.NotHealthy(err)
   167  
   168  		srv.Shutdown(err)
   169  	})
   170  
   171  	level.Info(logger).Log("msg", "starting downsample node")
   172  	return nil
   173  }
   174  
   175  func downsampleBucket(
   176  	ctx context.Context,
   177  	logger log.Logger,
   178  	metrics *DownsampleMetrics,
   179  	bkt objstore.Bucket,
   180  	metas map[ulid.ULID]*metadata.Meta,
   181  	dir string,
   182  	downsampleConcurrency int,
   183  	blockFilesConcurrency int,
   184  	hashFunc metadata.HashFunc,
   185  	acceptMalformedIndex bool,
   186  ) (rerr error) {
   187  	if err := os.MkdirAll(dir, 0750); err != nil {
   188  		return errors.Wrap(err, "create dir")
   189  	}
   190  
   191  	defer func() {
   192  		// Leave the downsample directory for inspection if it is a halt error
   193  		// or if it is not then so that possibly we would not have to download everything again.
   194  		if rerr != nil {
   195  			return
   196  		}
   197  		if err := os.RemoveAll(dir); err != nil {
   198  			level.Error(logger).Log("msg", "failed to remove downsample cache directory", "path", dir, "err", err)
   199  		}
   200  	}()
   201  
   202  	// mapping from a hash over all source IDs to blocks. We don't need to downsample a block
   203  	// if a downsampled version with the same hash already exists.
   204  	sources5m := map[ulid.ULID]struct{}{}
   205  	sources1h := map[ulid.ULID]struct{}{}
   206  
   207  	for _, m := range metas {
   208  		switch m.Thanos.Downsample.Resolution {
   209  		case downsample.ResLevel0:
   210  			continue
   211  		case downsample.ResLevel1:
   212  			for _, id := range m.Compaction.Sources {
   213  				sources5m[id] = struct{}{}
   214  			}
   215  		case downsample.ResLevel2:
   216  			for _, id := range m.Compaction.Sources {
   217  				sources1h[id] = struct{}{}
   218  			}
   219  		default:
   220  			return errors.Errorf("unexpected downsampling resolution %d", m.Thanos.Downsample.Resolution)
   221  		}
   222  	}
   223  
   224  	ignoreDirs := []string{}
   225  	for ulid := range metas {
   226  		ignoreDirs = append(ignoreDirs, ulid.String())
   227  	}
   228  
   229  	if err := runutil.DeleteAll(dir, ignoreDirs...); err != nil {
   230  		level.Warn(logger).Log("msg", "failed deleting potentially outdated directories/files, some disk space usage might have leaked. Continuing", "err", err, "dir", dir)
   231  	}
   232  
   233  	metasULIDS := make([]ulid.ULID, 0, len(metas))
   234  	for k := range metas {
   235  		metasULIDS = append(metasULIDS, k)
   236  	}
   237  	sort.Slice(metasULIDS, func(i, j int) bool {
   238  		return metasULIDS[i].Compare(metasULIDS[j]) < 0
   239  	})
   240  
   241  	var (
   242  		wg                      sync.WaitGroup
   243  		metaCh                  = make(chan *metadata.Meta)
   244  		downsampleErrs          errutil.MultiError
   245  		errCh                   = make(chan error, downsampleConcurrency)
   246  		workerCtx, workerCancel = context.WithCancel(ctx)
   247  	)
   248  
   249  	defer workerCancel()
   250  
   251  	level.Debug(logger).Log("msg", "downsampling bucket", "concurrency", downsampleConcurrency)
   252  	for i := 0; i < downsampleConcurrency; i++ {
   253  		wg.Add(1)
   254  		go func() {
   255  			defer wg.Done()
   256  			for m := range metaCh {
   257  				resolution := downsample.ResLevel1
   258  				errMsg := "downsampling to 5 min"
   259  				if m.Thanos.Downsample.Resolution == downsample.ResLevel1 {
   260  					resolution = downsample.ResLevel2
   261  					errMsg = "downsampling to 60 min"
   262  				}
   263  				if err := processDownsampling(workerCtx, logger, bkt, m, dir, resolution, hashFunc, metrics, acceptMalformedIndex, blockFilesConcurrency); err != nil {
   264  					metrics.downsampleFailures.WithLabelValues(m.Thanos.GroupKey()).Inc()
   265  					errCh <- errors.Wrap(err, errMsg)
   266  
   267  				}
   268  				metrics.downsamples.WithLabelValues(m.Thanos.GroupKey()).Inc()
   269  			}
   270  		}()
   271  	}
   272  
   273  	// Workers scheduled, distribute blocks.
   274  metaSendLoop:
   275  	for _, mk := range metasULIDS {
   276  		m := metas[mk]
   277  
   278  		switch m.Thanos.Downsample.Resolution {
   279  		case downsample.ResLevel2:
   280  			continue
   281  
   282  		case downsample.ResLevel0:
   283  			missing := false
   284  			for _, id := range m.Compaction.Sources {
   285  				if _, ok := sources5m[id]; !ok {
   286  					missing = true
   287  					break
   288  				}
   289  			}
   290  			if !missing {
   291  				continue
   292  			}
   293  			// Only downsample blocks once we are sure to get roughly 2 chunks out of it.
   294  			// NOTE(fabxc): this must match with at which block size the compactor creates downsampled
   295  			// blocks. Otherwise we may never downsample some data.
   296  			if m.MaxTime-m.MinTime < downsample.ResLevel1DownsampleRange {
   297  				continue
   298  			}
   299  
   300  		case downsample.ResLevel1:
   301  			missing := false
   302  			for _, id := range m.Compaction.Sources {
   303  				if _, ok := sources1h[id]; !ok {
   304  					missing = true
   305  					break
   306  				}
   307  			}
   308  			if !missing {
   309  				continue
   310  			}
   311  			// Only downsample blocks once we are sure to get roughly 2 chunks out of it.
   312  			// NOTE(fabxc): this must match with at which block size the compactor creates downsampled
   313  			// blocks. Otherwise we may never downsample some data.
   314  			if m.MaxTime-m.MinTime < downsample.ResLevel2DownsampleRange {
   315  				continue
   316  			}
   317  		}
   318  
   319  		select {
   320  		case <-workerCtx.Done():
   321  			downsampleErrs.Add(workerCtx.Err())
   322  			break metaSendLoop
   323  		case metaCh <- m:
   324  		case downsampleErr := <-errCh:
   325  			downsampleErrs.Add(downsampleErr)
   326  			break metaSendLoop
   327  		}
   328  	}
   329  
   330  	close(metaCh)
   331  	wg.Wait()
   332  	workerCancel()
   333  	close(errCh)
   334  
   335  	// Collect any other error reported by the workers.
   336  	for downsampleErr := range errCh {
   337  		downsampleErrs.Add(downsampleErr)
   338  	}
   339  
   340  	return downsampleErrs.Err()
   341  }
   342  
   343  func processDownsampling(
   344  	ctx context.Context,
   345  	logger log.Logger,
   346  	bkt objstore.Bucket,
   347  	m *metadata.Meta,
   348  	dir string,
   349  	resolution int64,
   350  	hashFunc metadata.HashFunc,
   351  	metrics *DownsampleMetrics,
   352  	acceptMalformedIndex bool,
   353  	blockFilesConcurrency int,
   354  ) error {
   355  	begin := time.Now()
   356  	bdir := filepath.Join(dir, m.ULID.String())
   357  
   358  	err := block.Download(ctx, logger, bkt, m.ULID, bdir, objstore.WithFetchConcurrency(blockFilesConcurrency))
   359  	if err != nil {
   360  		return errors.Wrapf(err, "download block %s", m.ULID)
   361  	}
   362  	level.Info(logger).Log("msg", "downloaded block", "id", m.ULID, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
   363  
   364  	if err := block.VerifyIndex(logger, filepath.Join(bdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil && !acceptMalformedIndex {
   365  		return errors.Wrap(err, "input block index not valid")
   366  	}
   367  
   368  	begin = time.Now()
   369  
   370  	var pool chunkenc.Pool
   371  	if m.Thanos.Downsample.Resolution == 0 {
   372  		pool = chunkenc.NewPool()
   373  	} else {
   374  		pool = downsample.NewPool()
   375  	}
   376  
   377  	b, err := tsdb.OpenBlock(logger, bdir, pool)
   378  	if err != nil {
   379  		return errors.Wrapf(err, "open block %s", m.ULID)
   380  	}
   381  	defer runutil.CloseWithLogOnErr(log.With(logger, "outcome", "potential left mmap file handlers left"), b, "tsdb reader")
   382  
   383  	id, err := downsample.Downsample(logger, m, b, dir, resolution)
   384  	if err != nil {
   385  		return errors.Wrapf(err, "downsample block %s to window %d", m.ULID, resolution)
   386  	}
   387  	resdir := filepath.Join(dir, id.String())
   388  
   389  	downsampleDuration := time.Since(begin)
   390  	level.Info(logger).Log("msg", "downsampled block",
   391  		"from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds())
   392  	metrics.downsampleDuration.WithLabelValues(m.Thanos.GroupKey()).Observe(downsampleDuration.Seconds())
   393  
   394  	stats, err := block.GatherIndexHealthStats(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime)
   395  	if err == nil {
   396  		err = stats.AnyErr()
   397  	}
   398  	if err != nil && !acceptMalformedIndex {
   399  		return errors.Wrap(err, "output block index not valid")
   400  	}
   401  
   402  	meta, err := metadata.ReadFromDir(resdir)
   403  	if err != nil {
   404  		return errors.Wrap(err, "read meta")
   405  	}
   406  
   407  	if stats.ChunkMaxSize > 0 {
   408  		meta.Thanos.IndexStats.ChunkMaxSize = stats.ChunkMaxSize
   409  	}
   410  	if stats.SeriesMaxSize > 0 {
   411  		meta.Thanos.IndexStats.SeriesMaxSize = stats.SeriesMaxSize
   412  	}
   413  	if err := meta.WriteToDir(logger, resdir); err != nil {
   414  		return errors.Wrap(err, "write meta")
   415  	}
   416  
   417  	begin = time.Now()
   418  
   419  	err = block.Upload(ctx, logger, bkt, resdir, hashFunc)
   420  	if err != nil {
   421  		return errors.Wrapf(err, "upload downsampled block %s", id)
   422  	}
   423  
   424  	level.Info(logger).Log("msg", "uploaded block", "id", id, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
   425  
   426  	// It is not harmful if these fails.
   427  	if err := os.RemoveAll(bdir); err != nil {
   428  		level.Warn(logger).Log("msg", "failed to clean directory", "dir", bdir, "err", err)
   429  	}
   430  	if err := os.RemoveAll(resdir); err != nil {
   431  		level.Warn(logger).Log("msg", "failed to clean directory", "resdir", bdir, "err", err)
   432  	}
   433  
   434  	return nil
   435  }