github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/block/metadata.go (about)

     1  package block
     2  
     3  import (
     4  	"crypto/rand"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"os"
    10  	"path/filepath"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/go-kit/log"
    15  	"github.com/go-kit/log/level"
    16  	"github.com/grafana/dskit/multierror"
    17  	"github.com/grafana/dskit/runutil"
    18  	"github.com/oklog/ulid/v2"
    19  	"github.com/pkg/errors"
    20  	"github.com/prometheus/common/model"
    21  	"github.com/prometheus/prometheus/tsdb"
    22  	"github.com/prometheus/prometheus/tsdb/fileutil"
    23  
    24  	ingestv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1"
    25  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    26  )
    27  
    28  const (
    29  	UnknownSource   SourceType = ""
    30  	IngesterSource  SourceType = "ingester"
    31  	CompactorSource SourceType = "compactor"
    32  )
    33  
    34  const (
    35  	MetaFilename = "meta.json"
    36  )
    37  
    38  type SourceType string
    39  
    40  type MetaVersion int
    41  
    42  const (
    43  	// Version1 is a enumeration of Pyroscope section of TSDB meta supported by Pyroscope.
    44  	MetaVersion1 = MetaVersion(1)
    45  
    46  	// MetaVersion2 indicates the block format version.
    47  	// https://github.com/grafana/phlare/pull/767.
    48  	//  1. In this version we introduced symdb:
    49  	//     - stacktraces.parquet table has been deprecated.
    50  	//     - StacktracePartition column added to profiles.parquet table.
    51  	//     - symdb is stored in ./symbols sub-directory.
    52  	//  2. TotalValue column added to profiles.parquet table.
    53  	//  3. pprof labels discarded and never stored in the block.
    54  	MetaVersion2 = MetaVersion(2)
    55  
    56  	// MetaVersion3 indicates the block format version.
    57  	// https://github.com/grafana/pyroscope/pull/2196.
    58  	//  1. Introduction of symdb v2:
    59  	//     - locations, functions, mappings, strings parquet tables
    60  	//       moved to ./symbols sub-directory (symdb) and partitioned
    61  	//       by StacktracePartition. References to the partitions
    62  	//       are stored in the index.symdb file.
    63  	//  2. In this version, parquet tables are never loaded into
    64  	//     memory entirely. Instead, each partition (row range) is read
    65  	//     from the block on demand at query time.
    66  	MetaVersion3 = MetaVersion(3)
    67  )
    68  
    69  // IsValid returns true if the version is valid.
    70  func (v MetaVersion) IsValid() bool {
    71  	switch v {
    72  	case MetaVersion1, MetaVersion2, MetaVersion3:
    73  		return true
    74  	default:
    75  		return false
    76  	}
    77  }
    78  
    79  type BlockStats struct {
    80  	NumSamples  uint64 `json:"numSamples,omitempty"`
    81  	NumSeries   uint64 `json:"numSeries,omitempty"`
    82  	NumProfiles uint64 `json:"numProfiles,omitempty"`
    83  }
    84  
    85  type File struct {
    86  	RelPath string `json:"relPath"`
    87  	// SizeBytes is optional (e.g meta.json does not show size).
    88  	SizeBytes uint64 `json:"sizeBytes,omitempty"`
    89  
    90  	// Parquet can contain some optional Parquet file info
    91  	Parquet *ParquetFile `json:"parquet,omitempty"`
    92  	// TSDB can contain some optional TSDB file info
    93  	TSDB *TSDBFile `json:"tsdb,omitempty"`
    94  }
    95  
    96  type ParquetFile struct {
    97  	NumRowGroups uint64 `json:"numRowGroups,omitempty"`
    98  	NumRows      uint64 `json:"numRows,omitempty"`
    99  }
   100  
   101  type TSDBFile struct {
   102  	NumSeries uint64 `json:"numSeries,omitempty"`
   103  }
   104  
   105  // BlockDesc describes a block by ULID and time range.
   106  type BlockDesc struct {
   107  	ULID    ulid.ULID  `json:"ulid"`
   108  	MinTime model.Time `json:"minTime"`
   109  	MaxTime model.Time `json:"maxTime"`
   110  }
   111  
   112  type MetaStats struct {
   113  	BlockStats
   114  	FileStats      []FileStats
   115  	TotalSizeBytes uint64
   116  }
   117  
   118  type FileStats struct {
   119  	RelPath   string
   120  	SizeBytes uint64
   121  }
   122  
   123  // BlockMetaCompaction holds information about compactions a block went through.
   124  type BlockMetaCompaction struct {
   125  	// Maximum number of compaction cycles any source block has
   126  	// gone through.
   127  	Level int `json:"level"`
   128  	// ULIDs of all source head blocks that went into the block.
   129  	Sources []ulid.ULID `json:"sources,omitempty"`
   130  	// Indicates that during compaction it resulted in a block without any samples
   131  	// so it should be deleted on the next reloadBlocks.
   132  	Deletable bool `json:"deletable,omitempty"`
   133  	// Short descriptions of the direct blocks that were used to create
   134  	// this block.
   135  	Parents []BlockDesc `json:"parents,omitempty"`
   136  	Failed  bool        `json:"failed,omitempty"`
   137  	// Additional information about the compaction, for example, block created from out-of-order chunks.
   138  	Hints []string `json:"hints,omitempty"`
   139  }
   140  
   141  type Meta struct {
   142  	// Unique identifier for the block and its contents. Changes on compaction.
   143  	ULID ulid.ULID `json:"ulid"`
   144  
   145  	// MinTime and MaxTime specify the time range all samples
   146  	// in the block are in.
   147  	MinTime model.Time `json:"minTime"`
   148  	MaxTime model.Time `json:"maxTime"`
   149  
   150  	// Stats about the contents of the block.
   151  	Stats BlockStats `json:"stats,omitempty"`
   152  
   153  	// File is a sorted (by rel path) list of all files in block directory of this block known to PyroscopeDB.
   154  	// Sorted by relative path.
   155  	Files []File `json:"files,omitempty"`
   156  
   157  	// Information on compactions the block was created from.
   158  	Compaction BlockMetaCompaction `json:"compaction"`
   159  
   160  	// Version of the index format.
   161  	Version MetaVersion `json:"version"`
   162  
   163  	// Labels are the external labels identifying the producer as well as tenant.
   164  	Labels map[string]string `json:"labels"`
   165  
   166  	// Source is a real upload source of the block.
   167  	Source SourceType `json:"source,omitempty"`
   168  
   169  	// Downsample is a downsampling resolution of the block. 0 means no downsampling.
   170  	Downsample `json:"downsample"`
   171  }
   172  
   173  type Downsample struct {
   174  	Resolution int64 `json:"resolution"`
   175  }
   176  
   177  func (m *Meta) FileByRelPath(name string) *File {
   178  	for _, f := range m.Files {
   179  		if f.RelPath == name {
   180  			return &f
   181  		}
   182  	}
   183  	return nil
   184  }
   185  
   186  func (m *Meta) InRange(start, end model.Time) bool {
   187  	return InRange(m.MinTime, m.MaxTime, start, end)
   188  }
   189  
   190  func (m *Meta) String() string {
   191  	return fmt.Sprintf(
   192  		"%s (min time: %s, max time: %s)",
   193  		m.ULID,
   194  		m.MinTime.Time().UTC().Format(time.RFC3339Nano),
   195  		m.MaxTime.Time().UTC().Format(time.RFC3339Nano),
   196  	)
   197  }
   198  
   199  func (m *Meta) Clone() *Meta {
   200  	data, err := json.Marshal(m)
   201  	if err != nil {
   202  		panic(err)
   203  	}
   204  	var clone Meta
   205  	if err := json.Unmarshal(data, &clone); err != nil {
   206  		panic(err)
   207  	}
   208  	return &clone
   209  }
   210  func (m *Meta) BlockInfo() *typesv1.BlockInfo {
   211  	info := &typesv1.BlockInfo{}
   212  	m.WriteBlockInfo(info)
   213  	return info
   214  }
   215  
   216  func (m *Meta) WriteBlockInfo(info *typesv1.BlockInfo) {
   217  	info.Ulid = m.ULID.String()
   218  	info.MinTime = int64(m.MinTime)
   219  	info.MaxTime = int64(m.MaxTime)
   220  	if info.Compaction == nil {
   221  		info.Compaction = &typesv1.BlockCompaction{}
   222  	}
   223  	info.Compaction.Level = int32(m.Compaction.Level)
   224  	info.Compaction.Parents = make([]string, len(m.Compaction.Parents))
   225  	for i, p := range m.Compaction.Parents {
   226  		info.Compaction.Parents[i] = p.ULID.String()
   227  	}
   228  	info.Compaction.Sources = make([]string, len(m.Compaction.Sources))
   229  	for i, s := range m.Compaction.Sources {
   230  		info.Compaction.Sources[i] = s.String()
   231  	}
   232  	info.Labels = make([]*typesv1.LabelPair, 0, len(m.Labels))
   233  	for k, v := range m.Labels {
   234  		info.Labels = append(info.Labels, &typesv1.LabelPair{
   235  			Name:  k,
   236  			Value: v,
   237  		})
   238  	}
   239  }
   240  
   241  func generateULID() ulid.ULID {
   242  	return ulid.MustNew(ulid.Timestamp(time.Now()), rand.Reader)
   243  }
   244  
   245  func NewMeta() *Meta {
   246  	return &Meta{
   247  		ULID: generateULID(),
   248  
   249  		MinTime: math.MaxInt64,
   250  		MaxTime: 0,
   251  		Labels:  make(map[string]string),
   252  		Version: MetaVersion3,
   253  	}
   254  }
   255  
   256  func MetaFromDir(dir string) (*Meta, int64, error) {
   257  	b, err := os.ReadFile(filepath.Join(dir, MetaFilename))
   258  	if err != nil {
   259  		return nil, 0, err
   260  	}
   261  	var m Meta
   262  
   263  	if err := json.Unmarshal(b, &m); err != nil {
   264  		return nil, 0, err
   265  	}
   266  	switch m.Version {
   267  	case MetaVersion1:
   268  	case MetaVersion2:
   269  	case MetaVersion3:
   270  	default:
   271  		return nil, 0, errors.Errorf("unexpected meta file version %d", m.Version)
   272  	}
   273  
   274  	return &m, int64(len(b)), nil
   275  }
   276  
   277  type wrappedWriter struct {
   278  	w io.Writer
   279  	n int
   280  }
   281  
   282  func (w *wrappedWriter) Write(p []byte) (n int, err error) {
   283  	n, err = w.w.Write(p)
   284  	if err != nil {
   285  		return 0, err
   286  	}
   287  	w.n += n
   288  	return n, nil
   289  }
   290  
   291  func (meta *Meta) WriteTo(w io.Writer) (int64, error) {
   292  	wrapped := &wrappedWriter{
   293  		w: w,
   294  	}
   295  	enc := json.NewEncoder(wrapped)
   296  	enc.SetIndent("", "\t")
   297  	return int64(wrapped.n), enc.Encode(meta)
   298  }
   299  
   300  // WriteToFile writes the encoded meta into <dir>/meta.json.
   301  func (meta *Meta) WriteToFile(logger log.Logger, dir string) (int64, error) {
   302  	// Make any changes to the file appear atomic.
   303  	path := filepath.Join(dir, MetaFilename)
   304  	tmp := path + ".tmp"
   305  	defer func() {
   306  		if err := os.RemoveAll(tmp); err != nil {
   307  			level.Error(logger).Log("msg", "remove tmp file", "err", err.Error())
   308  		}
   309  	}()
   310  
   311  	f, err := os.Create(tmp)
   312  	if err != nil {
   313  		return 0, err
   314  	}
   315  
   316  	jsonMeta, err := json.MarshalIndent(meta, "", "\t")
   317  	if err != nil {
   318  		return 0, err
   319  	}
   320  
   321  	n, err := f.Write(jsonMeta)
   322  	if err != nil {
   323  		return 0, multierror.New(err, f.Close()).Err()
   324  	}
   325  
   326  	// Force the kernel to persist the file on disk to avoid data loss if the host crashes.
   327  	if err := f.Sync(); err != nil {
   328  		return 0, multierror.New(err, f.Close()).Err()
   329  	}
   330  	if err := f.Close(); err != nil {
   331  		return 0, err
   332  	}
   333  	return int64(n), fileutil.Replace(tmp, path)
   334  }
   335  
   336  func (meta *Meta) TSDBBlockMeta() tsdb.BlockMeta {
   337  	return tsdb.BlockMeta{
   338  		ULID:    meta.ULID,
   339  		MinTime: int64(meta.MinTime),
   340  		MaxTime: int64(meta.MaxTime),
   341  	}
   342  }
   343  
   344  func (meta *Meta) GetStats() MetaStats {
   345  	fileStats := make([]FileStats, 0, len(meta.Files))
   346  	totalSizeBytes := uint64(0)
   347  	for _, file := range meta.Files {
   348  		fileStats = append(fileStats, FileStats{
   349  			RelPath:   file.RelPath,
   350  			SizeBytes: file.SizeBytes,
   351  		})
   352  		totalSizeBytes += file.SizeBytes
   353  	}
   354  
   355  	return MetaStats{
   356  		BlockStats:     meta.Stats,
   357  		FileStats:      fileStats,
   358  		TotalSizeBytes: totalSizeBytes,
   359  	}
   360  }
   361  
   362  func (stats MetaStats) ConvertToBlockStats() *ingestv1.BlockStats {
   363  	indexBytes := uint64(0)
   364  	profileBytes := uint64(0)
   365  	symbolBytes := uint64(0)
   366  	for _, f := range stats.FileStats {
   367  		if f.RelPath == IndexFilename {
   368  			indexBytes = f.SizeBytes
   369  		} else if f.RelPath == "profiles.parquet" {
   370  			profileBytes += f.SizeBytes
   371  		} else if strings.HasPrefix(f.RelPath, "symbols") || filepath.Ext(f.RelPath) == ".symdb" {
   372  			symbolBytes += f.SizeBytes
   373  		}
   374  	}
   375  	blockStats := &ingestv1.BlockStats{
   376  		SeriesCount:  stats.NumSeries,
   377  		ProfileCount: stats.NumProfiles,
   378  		SampleCount:  stats.NumSamples,
   379  		IndexBytes:   indexBytes,
   380  		ProfileBytes: profileBytes,
   381  		SymbolBytes:  symbolBytes,
   382  	}
   383  	return blockStats
   384  }
   385  
   386  // ReadMetaFromDir reads the given meta from <dir>/meta.json.
   387  func ReadMetaFromDir(dir string) (*Meta, error) {
   388  	f, err := os.Open(filepath.Join(dir, filepath.Clean(MetaFilename)))
   389  	if err != nil {
   390  		return nil, err
   391  	}
   392  	return Read(f)
   393  }
   394  
   395  func exhaustCloseWithErrCapture(err *error, r io.ReadCloser, msg string) {
   396  	_, copyErr := io.Copy(io.Discard, r)
   397  
   398  	runutil.CloseWithErrCapture(err, r, "%s", msg)
   399  
   400  	// Prepend the io.Copy error.
   401  	merr := multierror.MultiError{}
   402  	merr.Add(copyErr)
   403  	merr.Add(*err)
   404  
   405  	*err = merr.Err()
   406  }
   407  
   408  // Read the block meta from the given reader.
   409  func Read(rc io.ReadCloser) (_ *Meta, err error) {
   410  	defer exhaustCloseWithErrCapture(&err, rc, "close meta JSON")
   411  
   412  	var m Meta
   413  	if err = json.NewDecoder(rc).Decode(&m); err != nil {
   414  		return nil, err
   415  	}
   416  
   417  	switch m.Version {
   418  	case MetaVersion1:
   419  	case MetaVersion2:
   420  	case MetaVersion3:
   421  	default:
   422  		return nil, errors.Errorf("unexpected meta file version %d", m.Version)
   423  	}
   424  
   425  	return &m, nil
   426  }
   427  
   428  func InRange(min, max, start, end model.Time) bool {
   429  	if start > max {
   430  		return false
   431  	}
   432  	if end < min {
   433  		return false
   434  	}
   435  	return true
   436  }