github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/store/shard.go (about)

     1  package store
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"fmt"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"go.etcd.io/bbolt"
    11  
    12  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    13  	"github.com/grafana/pyroscope/pkg/block/metadata"
    14  	"github.com/grafana/pyroscope/pkg/metastore/store"
    15  	multitenancy "github.com/grafana/pyroscope/pkg/tenant"
    16  )
    17  
    18  const (
    19  	tenantShardStringsBucketName = ".strings"
    20  	tenantShardIndexKeyName      = ".index"
    21  )
    22  
    23  var (
    24  	ErrInvalidStringTable = errors.New("malformed string table")
    25  	ErrInvalidShardIndex  = errors.New("malformed shard index")
    26  )
    27  
    28  var (
    29  	tenantShardIndexKeyNameBytes      = []byte(tenantShardIndexKeyName)
    30  	tenantShardStringsBucketNameBytes = []byte(tenantShardStringsBucketName)
    31  
    32  	blockCursorSkipPrefix = []byte{'.'}
    33  )
    34  
    35  type Shard struct {
    36  	Partition   Partition
    37  	Tenant      string
    38  	Shard       uint32
    39  	ShardIndex  ShardIndex
    40  	StringTable *metadata.StringTable
    41  
    42  	// TODO(kolesnikovae): Build a skip index for labels.
    43  	// Labels *metadata.StringTable
    44  }
    45  
    46  func NewShard(p Partition, tenant string, shard uint32) *Shard {
    47  	return &Shard{
    48  		Partition:   p,
    49  		Tenant:      tenant,
    50  		Shard:       shard,
    51  		StringTable: metadata.NewStringTable(),
    52  		ShardIndex:  ShardIndex{},
    53  	}
    54  }
    55  
    56  func (s *Shard) Store(tx *bbolt.Tx, md *metastorev1.BlockMeta) error {
    57  	shardBucket, err := getOrCreateTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard)
    58  	if err != nil {
    59  		return err
    60  	}
    61  
    62  	n := len(s.StringTable.Strings)
    63  	s.StringTable.Import(md)
    64  	if added := s.StringTable.Strings[n:]; len(added) > 0 {
    65  		stringTable, err := getOrCreateSubBucket(shardBucket, tenantShardStringsBucketNameBytes)
    66  		if err != nil {
    67  			return err
    68  		}
    69  		k := binary.BigEndian.AppendUint32(nil, uint32(n))
    70  		v := encodeStrings(added)
    71  		if err = stringTable.Put(k, v); err != nil {
    72  			return err
    73  		}
    74  	}
    75  	md.StringTable = nil
    76  	value, err := md.MarshalVT()
    77  	if err != nil {
    78  		return err
    79  	}
    80  
    81  	var updateIndex bool
    82  	if s.ShardIndex.MinTime == 0 || s.ShardIndex.MinTime > md.MinTime {
    83  		s.ShardIndex.MinTime = md.MinTime
    84  		updateIndex = true
    85  	}
    86  	if s.ShardIndex.MaxTime < md.MaxTime {
    87  		s.ShardIndex.MaxTime = md.MaxTime
    88  		updateIndex = true
    89  	}
    90  	if updateIndex {
    91  		if err = shardBucket.Put(tenantShardIndexKeyNameBytes, s.ShardIndex.MarshalBinary()); err != nil {
    92  			return err
    93  		}
    94  	}
    95  
    96  	return shardBucket.Put([]byte(md.Id), value)
    97  }
    98  
    99  func (s *Shard) Find(tx *bbolt.Tx, blocks ...string) []store.KV {
   100  	bucket := getTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard)
   101  	if bucket == nil {
   102  		return nil
   103  	}
   104  	kv := make([]store.KV, 0, len(blocks))
   105  	for _, b := range blocks {
   106  		k := []byte(b)
   107  		if v := bucket.Get(k); v != nil {
   108  			kv = append(kv, store.KV{Key: k, Value: v})
   109  		}
   110  	}
   111  	return kv
   112  }
   113  
   114  func (s *Shard) Blocks(tx *bbolt.Tx) *store.CursorIterator {
   115  	bucket := getTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard)
   116  	if bucket == nil {
   117  		return nil
   118  	}
   119  	cursor := store.NewCursorIter(bucket.Cursor())
   120  	cursor.SkipPrefix = blockCursorSkipPrefix
   121  	return cursor
   122  }
   123  
   124  func (s *Shard) Delete(tx *bbolt.Tx, blocks ...string) error {
   125  	tenantShard := getTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard)
   126  	if tenantShard == nil {
   127  		return nil
   128  	}
   129  	for _, b := range blocks {
   130  		if err := tenantShard.Delete([]byte(b)); err != nil {
   131  			return err
   132  		}
   133  	}
   134  	return nil
   135  }
   136  
   137  func (s *Shard) TombstoneName() string {
   138  	var b strings.Builder
   139  	b.WriteString(s.Partition.String())
   140  	b.WriteByte('-')
   141  	b.WriteByte('T')
   142  	if s.Tenant != "" {
   143  		b.WriteString(s.Tenant)
   144  	} else {
   145  		b.WriteString(multitenancy.DefaultTenantID)
   146  	}
   147  	b.WriteByte('-')
   148  	b.WriteByte('S')
   149  	b.WriteString(strconv.FormatUint(uint64(s.Shard), 10))
   150  	return b.String()
   151  }
   152  
   153  // ShallowCopy creates a shallow copy: no deep copy of the string table.
   154  // The copy can be accessed safely by multiple readers, and it represents
   155  // a snapshot of the shard including the string table.
   156  //
   157  // Strings added after the copy is made won't be visible to the reader.
   158  // The writer MUST invalidate the cache before access: copies in-use can
   159  // still be used (strings is a header copy of append-only slice).
   160  func (s *Shard) ShallowCopy() *Shard {
   161  	return &Shard{
   162  		Partition:  s.Partition,
   163  		Tenant:     s.Tenant,
   164  		Shard:      s.Shard,
   165  		ShardIndex: s.ShardIndex,
   166  		StringTable: &metadata.StringTable{
   167  			Strings: s.StringTable.Strings,
   168  		},
   169  	}
   170  }
   171  
   172  func getTenantShardBucket(tx *bbolt.Tx, p Partition, tenant string, shard uint32) *bbolt.Bucket {
   173  	if partition := getPartitionsBucket(tx).Bucket(p.Bytes()); partition != nil {
   174  		if shards := partition.Bucket(tenantBucketName(tenant)); shards != nil {
   175  			return shards.Bucket(binary.BigEndian.AppendUint32(nil, shard))
   176  		}
   177  	}
   178  	return nil
   179  }
   180  
   181  func getOrCreateTenantShardBucket(tx *bbolt.Tx, p Partition, tenant string, shard uint32) (*bbolt.Bucket, error) {
   182  	partition, err := getOrCreateSubBucket(getPartitionsBucket(tx), p.Bytes())
   183  	if err != nil {
   184  		return nil, fmt.Errorf("error creating partition bucket for %s: %w", p, err)
   185  	}
   186  	shards, err := getOrCreateSubBucket(partition, tenantBucketName(tenant))
   187  	if err != nil {
   188  		return nil, fmt.Errorf("error creating shard bucket for tenant %s in parititon %v: %w", tenant, p, err)
   189  	}
   190  	tenantShard, err := getOrCreateSubBucket(shards, binary.BigEndian.AppendUint32(nil, shard))
   191  	if err != nil {
   192  		return nil, fmt.Errorf("error creating shard bucket for partiton %s and shard %d: %w", p, shard, err)
   193  	}
   194  	return tenantShard, nil
   195  }
   196  
   197  func loadTenantShard(tx *bbolt.Tx, p Partition, tenant string, shard uint32) (*Shard, error) {
   198  	shardBucket := getTenantShardBucket(tx, p, tenant, shard)
   199  	if shardBucket == nil {
   200  		return nil, nil
   201  	}
   202  
   203  	s := NewShard(p, tenant, shard)
   204  	stringTable := shardBucket.Bucket(tenantShardStringsBucketNameBytes)
   205  	if stringTable == nil {
   206  		return s, nil
   207  	}
   208  	stringsIter := newStringIter(store.NewCursorIter(stringTable.Cursor()))
   209  	defer func() {
   210  		_ = stringsIter.Close()
   211  	}()
   212  	var err error
   213  	if err = s.StringTable.Load(stringsIter); err != nil {
   214  		return nil, err
   215  	}
   216  
   217  	if b := shardBucket.Get(tenantShardIndexKeyNameBytes); len(b) > 0 {
   218  		if err = s.ShardIndex.UnmarshalBinary(b); err != nil {
   219  			return nil, err
   220  		}
   221  	}
   222  
   223  	return s, nil
   224  }