github.com/grafana/pyroscope@v1.18.0/pkg/block/object.go (about)

     1  package block
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  
    11  	"github.com/grafana/dskit/multierror"
    12  	"github.com/oklog/ulid/v2"
    13  	"github.com/pkg/errors"
    14  	"golang.org/x/sync/errgroup"
    15  
    16  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    17  	"github.com/grafana/pyroscope/pkg/block/metadata"
    18  	"github.com/grafana/pyroscope/pkg/objstore"
    19  	"github.com/grafana/pyroscope/pkg/util"
    20  	"github.com/grafana/pyroscope/pkg/util/bufferpool"
    21  	"github.com/grafana/pyroscope/pkg/util/refctr"
    22  )
    23  
    24  // TODO Next:
    25  //  - Separate storages for segments and compacted blocks.
    26  //  - Local cache? Useful for all-in-one deployments.
    27  //  - Distributed cache.
    28  
    29  // Object represents a block or a segment in the object storage.
    30  type Object struct {
    31  	path    string
    32  	meta    *metastorev1.BlockMeta
    33  	storage objstore.BucketReader
    34  	local   *objstore.ReadOnlyFile
    35  
    36  	refs refctr.Counter
    37  	buf  *bufferpool.Buffer
    38  	err  error
    39  
    40  	memSize     int
    41  	downloadDir string
    42  }
    43  
    44  type ObjectOption func(*Object)
    45  
    46  func WithObjectPath(path string) ObjectOption {
    47  	return func(obj *Object) {
    48  		obj.path = path
    49  	}
    50  }
    51  
    52  func WithObjectMaxSizeLoadInMemory(size int) ObjectOption {
    53  	return func(obj *Object) {
    54  		obj.memSize = size
    55  	}
    56  }
    57  
    58  func WithObjectDownload(dir string) ObjectOption {
    59  	return func(obj *Object) {
    60  		obj.downloadDir = dir
    61  	}
    62  }
    63  
    64  func NewObjectFromPath(ctx context.Context, storage objstore.Bucket, path string, opts ...ObjectOption) (*Object, error) {
    65  	attrs, err := storage.Attributes(ctx, path)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	defaultSize := int64(1 << 8) // 18)
    71  	offset := attrs.Size - defaultSize
    72  	if offset < 0 {
    73  		offset = 0
    74  	}
    75  	size := attrs.Size - offset
    76  
    77  	buf := bufferpool.GetBuffer(int(size))
    78  	if err := objstore.ReadRange(ctx, buf, path, storage, offset, size); err != nil {
    79  		return nil, err
    80  	}
    81  	if size < 8 {
    82  		return nil, errors.New("invalid object too small")
    83  	}
    84  
    85  	metaSize := int64(binary.BigEndian.Uint32(buf.B[len(buf.B)-8:len(buf.B)-4])) + 8
    86  	if metaSize > size {
    87  		offset = attrs.Size - metaSize
    88  
    89  		bufNew := bufferpool.GetBuffer(int(metaSize))
    90  		if err := objstore.ReadRange(ctx, bufNew, path, storage, offset, metaSize-size); err != nil {
    91  			return nil, err
    92  		}
    93  		bufNew.B = append(bufNew.B, buf.B...)
    94  		buf = bufNew
    95  
    96  	}
    97  
    98  	var meta metastorev1.BlockMeta
    99  	if err := metadata.Decode(buf.B, &meta); err != nil {
   100  		return nil, err
   101  	}
   102  	meta.Size = uint64(attrs.Size)
   103  
   104  	opts = append(opts, WithObjectPath(path))
   105  	return NewObject(storage, &meta, opts...), nil
   106  }
   107  
   108  func NewObject(storage objstore.Bucket, md *metastorev1.BlockMeta, opts ...ObjectOption) *Object {
   109  	o := &Object{
   110  		storage: storage,
   111  		meta:    md,
   112  		path:    ObjectPath(md),
   113  		memSize: defaultObjectSizeLoadInMemory,
   114  	}
   115  	for _, opt := range opts {
   116  		opt(o)
   117  	}
   118  	return o
   119  }
   120  
   121  func ObjectPath(md *metastorev1.BlockMeta) string {
   122  	return BuildObjectPath(metadata.Tenant(md), md.Shard, md.CompactionLevel, md.Id)
   123  }
   124  
   125  func BuildObjectDir(tenant string, shard uint32) string {
   126  	topLevel := DirNameBlock
   127  	tenantDirName := tenant
   128  	if tenant == "" {
   129  		topLevel = DirNameSegment
   130  		tenantDirName = DirNameAnonTenant
   131  	}
   132  	var b strings.Builder
   133  	b.WriteString(topLevel)
   134  	b.WriteByte('/')
   135  	b.WriteString(strconv.Itoa(int(shard)))
   136  	b.WriteByte('/')
   137  	b.WriteString(tenantDirName)
   138  	b.WriteByte('/')
   139  	return b.String()
   140  }
   141  
   142  func BuildObjectPath(tenant string, shard uint32, level uint32, block string) string {
   143  	topLevel := DirNameBlock
   144  	tenantDirName := tenant
   145  	if level == 0 {
   146  		topLevel = DirNameSegment
   147  		tenantDirName = DirNameAnonTenant
   148  	}
   149  	var b strings.Builder
   150  	b.WriteString(topLevel)
   151  	b.WriteByte('/')
   152  	b.WriteString(strconv.Itoa(int(shard)))
   153  	b.WriteByte('/')
   154  	b.WriteString(tenantDirName)
   155  	b.WriteByte('/')
   156  	b.WriteString(block)
   157  	b.WriteByte('/')
   158  	b.WriteString(FileNameDataObject)
   159  	return b.String()
   160  }
   161  
   162  func MetadataDLQObjectPath(md *metastorev1.BlockMeta) string {
   163  	var b strings.Builder
   164  	tenantDirName := DirNameAnonTenant
   165  	if md.CompactionLevel > 0 {
   166  		tenantDirName = metadata.Tenant(md)
   167  	}
   168  	b.WriteString(DirNameDLQ)
   169  	b.WriteByte('/')
   170  	b.WriteString(strconv.Itoa(int(md.Shard)))
   171  	b.WriteByte('/')
   172  	b.WriteString(tenantDirName)
   173  	b.WriteByte('/')
   174  	b.WriteString(md.Id)
   175  	b.WriteByte('/')
   176  	b.WriteString(FileNameMetadataObject)
   177  	return b.String()
   178  }
   179  
   180  func ParseBlockIDFromPath(path string) (ulid.ULID, error) {
   181  	tokens := strings.Split(path, "/")
   182  	if len(tokens) < 2 {
   183  		return ulid.ULID{}, fmt.Errorf("invalid path format: %s", path)
   184  	}
   185  	blockID, err := ulid.Parse(tokens[len(tokens)-2])
   186  	if err != nil {
   187  		return ulid.ULID{}, fmt.Errorf("expected ULID: %s: %w", path, err)
   188  	}
   189  	return blockID, nil
   190  }
   191  
   192  // Open opens the object, loading the data into memory if it's small enough.
   193  //
   194  // Open may be called multiple times concurrently, but the
   195  // object is only initialized once. While it is possible to open
   196  // the object repeatedly after close, the caller must pass the
   197  // failure reason to the "CloseWithError" call, preventing further
   198  // use, if applicable.
   199  func (obj *Object) Open(ctx context.Context) error {
   200  	return obj.refs.IncErr(func() error {
   201  		return obj.open(ctx)
   202  	})
   203  }
   204  
   205  func (obj *Object) open(ctx context.Context) (err error) {
   206  	if obj.err != nil {
   207  		// In case if the object has been already closed with an error,
   208  		// and then released, return the error immediately.
   209  		return obj.err
   210  	}
   211  	if len(obj.meta.Datasets) == 0 {
   212  		return nil
   213  	}
   214  	// Estimate the size of the sections to process, and load the
   215  	// data into memory, if it's small enough.
   216  	if obj.meta.Size > uint64(obj.memSize) {
   217  		// Otherwise, download the object to the local directory,
   218  		// if it's specified, and use the local file.
   219  		if obj.downloadDir != "" {
   220  			return obj.Download(ctx)
   221  		}
   222  		// The object will be read from the storage directly.
   223  		return nil
   224  	}
   225  	obj.buf = bufferpool.GetBuffer(int(obj.meta.Size))
   226  	defer func() {
   227  		if err != nil {
   228  			_ = obj.closeErr(err)
   229  		}
   230  	}()
   231  	if err = objstore.ReadRange(ctx, obj.buf, obj.path, obj.storage, 0, int64(obj.meta.Size)); err != nil {
   232  		return fmt.Errorf("loading object into memory %s: %w", obj.path, err)
   233  	}
   234  	return nil
   235  }
   236  
   237  func (obj *Object) Close() error {
   238  	return obj.CloseWithError(nil)
   239  }
   240  
   241  // CloseWithError closes the object, releasing all the acquired resources,
   242  // once the last reference is released. If the provided error is not nil,
   243  // the object will be marked as failed, preventing any further use.
   244  func (obj *Object) CloseWithError(err error) (closeErr error) {
   245  	obj.refs.Dec(func() {
   246  		closeErr = obj.closeErr(err)
   247  	})
   248  	return closeErr
   249  }
   250  
   251  func (obj *Object) closeErr(err error) (closeErr error) {
   252  	obj.err = err
   253  	if obj.buf != nil {
   254  		bufferpool.Put(obj.buf)
   255  		obj.buf = nil
   256  	}
   257  	if obj.local != nil {
   258  		closeErr = obj.local.Close()
   259  		obj.local = nil
   260  	}
   261  	return closeErr
   262  }
   263  
   264  func (obj *Object) Download(ctx context.Context) error {
   265  	dir := filepath.Join(obj.downloadDir, obj.meta.Id)
   266  	local, err := objstore.Download(ctx, obj.path, obj.storage, dir)
   267  	if err != nil {
   268  		return err
   269  	}
   270  	obj.storage = local
   271  	obj.local = local
   272  	return nil
   273  }
   274  
   275  func (obj *Object) Metadata() *metastorev1.BlockMeta { return obj.meta }
   276  
   277  func (obj *Object) SetMetadata(md *metastorev1.BlockMeta) { obj.meta = md }
   278  
   279  // ReadMetadata fetches the full block metadata from the storage.
   280  // It the object does not include the metadata offset, the method
   281  // returns the metadata entry the object was opened with.
   282  func (obj *Object) ReadMetadata(ctx context.Context) (*metastorev1.BlockMeta, error) {
   283  	if obj.meta.MetadataOffset == 0 {
   284  		return obj.meta, nil
   285  	}
   286  	offset := int64(obj.meta.MetadataOffset)
   287  	size := int64(obj.meta.Size) - offset
   288  	buf := bufferpool.GetBuffer(int(size))
   289  	defer bufferpool.Put(buf)
   290  	if err := objstore.ReadRange(ctx, buf, obj.path, obj.storage, offset, size); err != nil {
   291  		return nil, fmt.Errorf("reading block metadata %s: %w", obj.path, err)
   292  	}
   293  	var meta metastorev1.BlockMeta
   294  	if err := metadata.Decode(buf.B, &meta); err != nil {
   295  		return nil, fmt.Errorf("decoding block metadata %s: %w", obj.path, err)
   296  	}
   297  	// Size is not stored in the metadata, so we need to preserve it.
   298  	meta.Size = obj.meta.Size
   299  	return &meta, nil
   300  }
   301  
   302  func (obj *Object) IsNotExists(err error) bool {
   303  	return objstore.IsNotExist(obj.storage, err)
   304  }
   305  
   306  // ObjectsFromMetas binds block metas to corresponding objects in the storage.
   307  func ObjectsFromMetas(storage objstore.Bucket, blocks []*metastorev1.BlockMeta, options ...ObjectOption) Objects {
   308  	objects := make([]*Object, len(blocks))
   309  	for i, m := range blocks {
   310  		objects[i] = NewObject(storage, m, options...)
   311  	}
   312  	return objects
   313  }
   314  
   315  type Objects []*Object
   316  
   317  func (s Objects) Open(ctx context.Context) error {
   318  	g, ctx := errgroup.WithContext(ctx)
   319  	for i := range s {
   320  		i := i
   321  		g.Go(util.RecoverPanic(func() error {
   322  			return s[i].Open(ctx)
   323  		}))
   324  	}
   325  	return g.Wait()
   326  }
   327  
   328  func (s Objects) Close() error {
   329  	var m multierror.MultiError
   330  	for i := range s {
   331  		m.Add(s[i].Close())
   332  	}
   333  	return m.Err()
   334  }