github.com/thanos-io/thanos@v0.32.5/pkg/compact/downsample/streamed_block_writer.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package downsample
     5  
     6  import (
     7  	"context"
     8  	"io"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/go-kit/log"
    13  	"github.com/go-kit/log/level"
    14  	"github.com/pkg/errors"
    15  	"github.com/prometheus/prometheus/model/labels"
    16  	"github.com/prometheus/prometheus/storage"
    17  	"github.com/prometheus/prometheus/tsdb"
    18  	"github.com/prometheus/prometheus/tsdb/chunks"
    19  	"github.com/prometheus/prometheus/tsdb/fileutil"
    20  	"github.com/prometheus/prometheus/tsdb/index"
    21  
    22  	"github.com/thanos-io/thanos/pkg/block"
    23  	"github.com/thanos-io/thanos/pkg/block/metadata"
    24  	"github.com/thanos-io/thanos/pkg/errutil"
    25  	"github.com/thanos-io/thanos/pkg/runutil"
    26  )
    27  
    28  // streamedBlockWriter writes downsampled blocks to a new data block. Implemented to save memory consumption
    29  // by writing chunks data right into the files, omitting keeping them in-memory. Index and meta data should be
    30  // sealed afterwards, when there aren't more series to process.
    31  type streamedBlockWriter struct {
    32  	blockDir       string
    33  	finalized      bool // Set to true, if Close was called.
    34  	logger         log.Logger
    35  	ignoreFinalize bool // If true Close does not finalize block due to internal error.
    36  	meta           metadata.Meta
    37  	totalChunks    uint64
    38  	totalSamples   uint64
    39  
    40  	chunkWriter tsdb.ChunkWriter
    41  	indexWriter tsdb.IndexWriter
    42  	indexReader tsdb.IndexReader
    43  	closers     []io.Closer
    44  
    45  	seriesRefs storage.SeriesRef // postings is a current posting position.
    46  }
    47  
    48  // NewStreamedBlockWriter returns streamedBlockWriter instance, it's not concurrency safe.
    49  // Caller is responsible to Close all io.Closers by calling the Close when downsampling is done.
    50  // In case if error happens outside of the StreamedBlockWriter during the processing,
    51  // index and meta files will be written anyway, so the caller is always responsible for removing block directory with
    52  // a garbage on error.
    53  // This approach simplifies StreamedBlockWriter interface, which is a best trade-off taking into account the error is an
    54  // exception, not a general case.
    55  func NewStreamedBlockWriter(
    56  	blockDir string,
    57  	indexReader tsdb.IndexReader,
    58  	logger log.Logger,
    59  	originMeta metadata.Meta,
    60  ) (w *streamedBlockWriter, err error) {
    61  	closers := make([]io.Closer, 0, 2)
    62  
    63  	// We should close any opened Closer up to an error.
    64  	defer func() {
    65  		if err != nil {
    66  			var merr errutil.MultiError
    67  			merr.Add(err)
    68  			for _, cl := range closers {
    69  				merr.Add(cl.Close())
    70  			}
    71  			err = merr.Err()
    72  		}
    73  	}()
    74  
    75  	chunkWriter, err := chunks.NewWriter(filepath.Join(blockDir, block.ChunksDirname))
    76  	if err != nil {
    77  		return nil, errors.Wrap(err, "create chunk writer in streamedBlockWriter")
    78  	}
    79  	closers = append(closers, chunkWriter)
    80  
    81  	indexWriter, err := index.NewWriter(context.TODO(), filepath.Join(blockDir, block.IndexFilename))
    82  	if err != nil {
    83  		return nil, errors.Wrap(err, "open index writer in streamedBlockWriter")
    84  	}
    85  	closers = append(closers, indexWriter)
    86  
    87  	symbols := indexReader.Symbols()
    88  	for symbols.Next() {
    89  		if err = indexWriter.AddSymbol(symbols.At()); err != nil {
    90  			return nil, errors.Wrap(err, "add symbols")
    91  		}
    92  	}
    93  	if err := symbols.Err(); err != nil {
    94  		return nil, errors.Wrap(err, "read symbols")
    95  	}
    96  
    97  	return &streamedBlockWriter{
    98  		logger:      logger,
    99  		blockDir:    blockDir,
   100  		indexReader: indexReader,
   101  		indexWriter: indexWriter,
   102  		chunkWriter: chunkWriter,
   103  		meta:        originMeta,
   104  		closers:     closers,
   105  	}, nil
   106  }
   107  
   108  // WriteSeries writes chunks data to the chunkWriter, writes lset and chunks MetasFetcher to indexWrites and adds label sets to
   109  // labelsValues sets and memPostings to be written on the finalize state in the end of downsampling process.
   110  func (w *streamedBlockWriter) WriteSeries(lset labels.Labels, chunks []chunks.Meta) error {
   111  	if w.finalized || w.ignoreFinalize {
   112  		return errors.New("series can't be added, writers has been closed or internal error happened")
   113  	}
   114  
   115  	if len(chunks) == 0 {
   116  		level.Warn(w.logger).Log("msg", "empty chunks happened, skip series", "series", strings.ReplaceAll(lset.String(), "\"", "'"))
   117  		return nil
   118  	}
   119  
   120  	if err := w.chunkWriter.WriteChunks(chunks...); err != nil {
   121  		w.ignoreFinalize = true
   122  		return errors.Wrap(err, "add chunks")
   123  	}
   124  
   125  	if err := w.indexWriter.AddSeries(w.seriesRefs, lset, chunks...); err != nil {
   126  		w.ignoreFinalize = true
   127  		return errors.Wrap(err, "add series")
   128  	}
   129  
   130  	w.seriesRefs++
   131  
   132  	w.totalChunks += uint64(len(chunks))
   133  	for i := range chunks {
   134  		w.totalSamples += uint64(chunks[i].Chunk.NumSamples())
   135  	}
   136  
   137  	return nil
   138  }
   139  
   140  // Close calls finalizer to complete index and meta files and closes all io.CLoser writers.
   141  // Idempotent.
   142  func (w *streamedBlockWriter) Close() error {
   143  	if w.finalized {
   144  		return nil
   145  	}
   146  	w.finalized = true
   147  
   148  	merr := errutil.MultiError{}
   149  
   150  	if w.ignoreFinalize {
   151  		// Close open file descriptors anyway.
   152  		for _, cl := range w.closers {
   153  			merr.Add(cl.Close())
   154  		}
   155  		return merr.Err()
   156  	}
   157  
   158  	// Finalize saves prepared index and metadata to corresponding files.
   159  
   160  	for _, cl := range w.closers {
   161  		merr.Add(cl.Close())
   162  	}
   163  
   164  	if err := w.writeMetaFile(); err != nil {
   165  		return errors.Wrap(err, "write meta meta")
   166  	}
   167  
   168  	if err := w.syncDir(); err != nil {
   169  		return errors.Wrap(err, "sync blockDir")
   170  	}
   171  
   172  	if err := merr.Err(); err != nil {
   173  		return errors.Wrap(err, "finalize")
   174  	}
   175  
   176  	// No error, claim success.
   177  
   178  	level.Info(w.logger).Log(
   179  		"msg", "finalized downsampled block",
   180  		"mint", w.meta.MinTime,
   181  		"maxt", w.meta.MaxTime,
   182  		"ulid", w.meta.ULID,
   183  		"resolution", w.meta.Thanos.Downsample.Resolution,
   184  	)
   185  	return nil
   186  }
   187  
   188  // syncDir syncs blockDir on disk.
   189  func (w *streamedBlockWriter) syncDir() (err error) {
   190  	df, err := fileutil.OpenDir(w.blockDir)
   191  	if err != nil {
   192  		return errors.Wrap(err, "open temporary block blockDir")
   193  	}
   194  
   195  	defer runutil.CloseWithErrCapture(&err, df, "close temporary block blockDir")
   196  
   197  	if err := fileutil.Fdatasync(df); err != nil {
   198  		return errors.Wrap(err, "sync temporary blockDir")
   199  	}
   200  
   201  	return nil
   202  }
   203  
   204  // writeMetaFile writes meta file.
   205  func (w *streamedBlockWriter) writeMetaFile() error {
   206  	w.meta.Version = metadata.TSDBVersion1
   207  	w.meta.Thanos.Source = metadata.CompactorSource
   208  	w.meta.Thanos.SegmentFiles = block.GetSegmentFiles(w.blockDir)
   209  	w.meta.Stats.NumChunks = w.totalChunks
   210  	w.meta.Stats.NumSamples = w.totalSamples
   211  	w.meta.Stats.NumSeries = uint64(w.seriesRefs)
   212  
   213  	return w.meta.WriteToDir(w.logger, w.blockDir)
   214  }