github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/store/shard.go (about) 1 package store 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "fmt" 7 "strconv" 8 "strings" 9 10 "go.etcd.io/bbolt" 11 12 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 13 "github.com/grafana/pyroscope/pkg/block/metadata" 14 "github.com/grafana/pyroscope/pkg/metastore/store" 15 multitenancy "github.com/grafana/pyroscope/pkg/tenant" 16 ) 17 18 const ( 19 tenantShardStringsBucketName = ".strings" 20 tenantShardIndexKeyName = ".index" 21 ) 22 23 var ( 24 ErrInvalidStringTable = errors.New("malformed string table") 25 ErrInvalidShardIndex = errors.New("malformed shard index") 26 ) 27 28 var ( 29 tenantShardIndexKeyNameBytes = []byte(tenantShardIndexKeyName) 30 tenantShardStringsBucketNameBytes = []byte(tenantShardStringsBucketName) 31 32 blockCursorSkipPrefix = []byte{'.'} 33 ) 34 35 type Shard struct { 36 Partition Partition 37 Tenant string 38 Shard uint32 39 ShardIndex ShardIndex 40 StringTable *metadata.StringTable 41 42 // TODO(kolesnikovae): Build a skip index for labels. 43 // Labels *metadata.StringTable 44 } 45 46 func NewShard(p Partition, tenant string, shard uint32) *Shard { 47 return &Shard{ 48 Partition: p, 49 Tenant: tenant, 50 Shard: shard, 51 StringTable: metadata.NewStringTable(), 52 ShardIndex: ShardIndex{}, 53 } 54 } 55 56 func (s *Shard) Store(tx *bbolt.Tx, md *metastorev1.BlockMeta) error { 57 shardBucket, err := getOrCreateTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard) 58 if err != nil { 59 return err 60 } 61 62 n := len(s.StringTable.Strings) 63 s.StringTable.Import(md) 64 if added := s.StringTable.Strings[n:]; len(added) > 0 { 65 stringTable, err := getOrCreateSubBucket(shardBucket, tenantShardStringsBucketNameBytes) 66 if err != nil { 67 return err 68 } 69 k := binary.BigEndian.AppendUint32(nil, uint32(n)) 70 v := encodeStrings(added) 71 if err = stringTable.Put(k, v); err != nil { 72 return err 73 } 74 } 75 md.StringTable = nil 76 value, err := md.MarshalVT() 77 if err != nil { 78 return err 79 } 80 81 var updateIndex bool 82 if s.ShardIndex.MinTime == 0 || s.ShardIndex.MinTime > md.MinTime { 83 s.ShardIndex.MinTime = md.MinTime 84 updateIndex = true 85 } 86 if s.ShardIndex.MaxTime < md.MaxTime { 87 s.ShardIndex.MaxTime = md.MaxTime 88 updateIndex = true 89 } 90 if updateIndex { 91 if err = shardBucket.Put(tenantShardIndexKeyNameBytes, s.ShardIndex.MarshalBinary()); err != nil { 92 return err 93 } 94 } 95 96 return shardBucket.Put([]byte(md.Id), value) 97 } 98 99 func (s *Shard) Find(tx *bbolt.Tx, blocks ...string) []store.KV { 100 bucket := getTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard) 101 if bucket == nil { 102 return nil 103 } 104 kv := make([]store.KV, 0, len(blocks)) 105 for _, b := range blocks { 106 k := []byte(b) 107 if v := bucket.Get(k); v != nil { 108 kv = append(kv, store.KV{Key: k, Value: v}) 109 } 110 } 111 return kv 112 } 113 114 func (s *Shard) Blocks(tx *bbolt.Tx) *store.CursorIterator { 115 bucket := getTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard) 116 if bucket == nil { 117 return nil 118 } 119 cursor := store.NewCursorIter(bucket.Cursor()) 120 cursor.SkipPrefix = blockCursorSkipPrefix 121 return cursor 122 } 123 124 func (s *Shard) Delete(tx *bbolt.Tx, blocks ...string) error { 125 tenantShard := getTenantShardBucket(tx, s.Partition, s.Tenant, s.Shard) 126 if tenantShard == nil { 127 return nil 128 } 129 for _, b := range blocks { 130 if err := tenantShard.Delete([]byte(b)); err != nil { 131 return err 132 } 133 } 134 return nil 135 } 136 137 func (s *Shard) TombstoneName() string { 138 var b strings.Builder 139 b.WriteString(s.Partition.String()) 140 b.WriteByte('-') 141 b.WriteByte('T') 142 if s.Tenant != "" { 143 b.WriteString(s.Tenant) 144 } else { 145 b.WriteString(multitenancy.DefaultTenantID) 146 } 147 b.WriteByte('-') 148 b.WriteByte('S') 149 b.WriteString(strconv.FormatUint(uint64(s.Shard), 10)) 150 return b.String() 151 } 152 153 // ShallowCopy creates a shallow copy: no deep copy of the string table. 154 // The copy can be accessed safely by multiple readers, and it represents 155 // a snapshot of the shard including the string table. 156 // 157 // Strings added after the copy is made won't be visible to the reader. 158 // The writer MUST invalidate the cache before access: copies in-use can 159 // still be used (strings is a header copy of append-only slice). 160 func (s *Shard) ShallowCopy() *Shard { 161 return &Shard{ 162 Partition: s.Partition, 163 Tenant: s.Tenant, 164 Shard: s.Shard, 165 ShardIndex: s.ShardIndex, 166 StringTable: &metadata.StringTable{ 167 Strings: s.StringTable.Strings, 168 }, 169 } 170 } 171 172 func getTenantShardBucket(tx *bbolt.Tx, p Partition, tenant string, shard uint32) *bbolt.Bucket { 173 if partition := getPartitionsBucket(tx).Bucket(p.Bytes()); partition != nil { 174 if shards := partition.Bucket(tenantBucketName(tenant)); shards != nil { 175 return shards.Bucket(binary.BigEndian.AppendUint32(nil, shard)) 176 } 177 } 178 return nil 179 } 180 181 func getOrCreateTenantShardBucket(tx *bbolt.Tx, p Partition, tenant string, shard uint32) (*bbolt.Bucket, error) { 182 partition, err := getOrCreateSubBucket(getPartitionsBucket(tx), p.Bytes()) 183 if err != nil { 184 return nil, fmt.Errorf("error creating partition bucket for %s: %w", p, err) 185 } 186 shards, err := getOrCreateSubBucket(partition, tenantBucketName(tenant)) 187 if err != nil { 188 return nil, fmt.Errorf("error creating shard bucket for tenant %s in parititon %v: %w", tenant, p, err) 189 } 190 tenantShard, err := getOrCreateSubBucket(shards, binary.BigEndian.AppendUint32(nil, shard)) 191 if err != nil { 192 return nil, fmt.Errorf("error creating shard bucket for partiton %s and shard %d: %w", p, shard, err) 193 } 194 return tenantShard, nil 195 } 196 197 func loadTenantShard(tx *bbolt.Tx, p Partition, tenant string, shard uint32) (*Shard, error) { 198 shardBucket := getTenantShardBucket(tx, p, tenant, shard) 199 if shardBucket == nil { 200 return nil, nil 201 } 202 203 s := NewShard(p, tenant, shard) 204 stringTable := shardBucket.Bucket(tenantShardStringsBucketNameBytes) 205 if stringTable == nil { 206 return s, nil 207 } 208 stringsIter := newStringIter(store.NewCursorIter(stringTable.Cursor())) 209 defer func() { 210 _ = stringsIter.Close() 211 }() 212 var err error 213 if err = s.StringTable.Load(stringsIter); err != nil { 214 return nil, err 215 } 216 217 if b := shardBucket.Get(tenantShardIndexKeyNameBytes); len(b) > 0 { 218 if err = s.ShardIndex.UnmarshalBinary(b); err != nil { 219 return nil, err 220 } 221 } 222 223 return s, nil 224 }