github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/bucketindex/updater.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/storage/tsdb/bucketindex/updater.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package bucketindex 7 8 import ( 9 "context" 10 "encoding/json" 11 "io" 12 "path" 13 "time" 14 15 "github.com/go-kit/log" 16 "github.com/go-kit/log/level" 17 "github.com/grafana/dskit/runutil" 18 "github.com/oklog/ulid/v2" 19 "github.com/pkg/errors" 20 21 "github.com/grafana/pyroscope/pkg/objstore" 22 "github.com/grafana/pyroscope/pkg/phlaredb/block" 23 ) 24 25 var ( 26 ErrBlockMetaNotFound = block.ErrorSyncMetaNotFound 27 ErrBlockMetaCorrupted = block.ErrorSyncMetaCorrupted 28 ErrBlockDeletionMarkNotFound = errors.New("block deletion mark not found") 29 ErrBlockDeletionMarkCorrupted = errors.New("block deletion mark corrupted") 30 ) 31 32 // Updater is responsible to generate an update in-memory bucket index. 33 type Updater struct { 34 bkt objstore.InstrumentedBucket 35 logger log.Logger 36 } 37 38 func NewUpdater(bkt objstore.Bucket, userID string, cfgProvider objstore.TenantConfigProvider, logger log.Logger) *Updater { 39 return &Updater{ 40 bkt: objstore.NewTenantBucketClient(userID, bkt, cfgProvider), 41 logger: logger, 42 } 43 } 44 45 // UpdateIndex generates the bucket index and returns it, without storing it to the storage. 46 // If the old index is not passed in input, then the bucket index will be generated from scratch. 47 func (w *Updater) UpdateIndex(ctx context.Context, old *Index) (*Index, map[ulid.ULID]error, error) { 48 var oldBlocks []*Block 49 var oldBlockDeletionMarks []*BlockDeletionMark 50 51 // Use the old index if provided, and it is using the latest version format. 52 if old != nil && old.Version == IndexVersion3 { 53 oldBlocks = old.Blocks 54 oldBlockDeletionMarks = old.BlockDeletionMarks 55 } 56 57 blocks, partials, err := w.updateBlocks(ctx, oldBlocks) 58 if err != nil { 59 return nil, nil, err 60 } 61 62 blockDeletionMarks, err := w.updateBlockDeletionMarks(ctx, oldBlockDeletionMarks) 63 if err != nil { 64 return nil, nil, err 65 } 66 67 return &Index{ 68 Version: IndexVersion3, 69 Blocks: blocks, 70 BlockDeletionMarks: blockDeletionMarks, 71 UpdatedAt: time.Now().Unix(), 72 }, partials, nil 73 } 74 75 func (w *Updater) updateBlocks(ctx context.Context, old []*Block) (blocks []*Block, partials map[ulid.ULID]error, _ error) { 76 discovered := map[ulid.ULID]struct{}{} 77 partials = map[ulid.ULID]error{} 78 79 // Find all blocks in the storage. 80 err := w.bkt.Iter(ctx, "", func(name string) error { 81 if id, ok := block.IsBlockDir(name); ok { 82 discovered[id] = struct{}{} 83 } 84 return nil 85 }) 86 if err != nil { 87 return nil, nil, errors.Wrap(err, "list blocks") 88 } 89 90 // Since blocks are immutable, all blocks already existing in the index can just be copied. 91 for _, b := range old { 92 if _, ok := discovered[b.ID]; ok { 93 blocks = append(blocks, b) 94 delete(discovered, b.ID) 95 } 96 } 97 98 level.Info(w.logger).Log("msg", "listed all blocks in storage", "newly_discovered", len(discovered), "existing", len(old)) 99 100 // Remaining blocks are new ones and we have to fetch the meta.json for each of them, in order 101 // to find out if their upload has been completed (meta.json is uploaded last) and get the block 102 // information to store in the bucket index. 103 for id := range discovered { 104 b, err := w.updateBlockIndexEntry(ctx, id) 105 if err == nil { 106 blocks = append(blocks, b) 107 continue 108 } 109 110 if errors.Is(err, ErrBlockMetaNotFound) { 111 partials[id] = err 112 level.Warn(w.logger).Log("msg", "skipped partial block when updating bucket index", "block", id.String()) 113 continue 114 } 115 if errors.Is(err, ErrBlockMetaCorrupted) { 116 partials[id] = err 117 level.Error(w.logger).Log("msg", "skipped block with corrupted meta.json when updating bucket index", "block", id.String(), "err", err) 118 continue 119 } 120 return nil, nil, err 121 } 122 level.Info(w.logger).Log("msg", "fetched blocks metas for newly discovered blocks", "total_blocks", len(blocks), "partial_errors", len(partials)) 123 124 return blocks, partials, nil 125 } 126 127 func (w *Updater) updateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Block, error) { 128 // Set a generous timeout for fetching the meta.json and getting the attributes of the same file. 129 // This protects against operations that can take unbounded time. 130 ctx, cancel := context.WithTimeout(ctx, time.Minute) 131 defer cancel() 132 133 metaFile := path.Join(id.String(), block.MetaFilename) 134 135 // Get the block's meta.json file. 136 r, err := w.bkt.Get(ctx, metaFile) 137 if w.bkt.IsObjNotFoundErr(err) { 138 return nil, ErrBlockMetaNotFound 139 } 140 if err != nil { 141 return nil, errors.Wrapf(err, "get block meta file: %v", metaFile) 142 } 143 defer runutil.CloseWithLogOnErr(w.logger, r, "close get block meta file") 144 145 metaContent, err := io.ReadAll(r) 146 if err != nil { 147 return nil, errors.Wrapf(err, "read block meta file: %v", metaFile) 148 } 149 150 // Unmarshal it. 151 m := block.Meta{} 152 if err := json.Unmarshal(metaContent, &m); err != nil { 153 return nil, errors.Wrapf(ErrBlockMetaCorrupted, "unmarshal block meta file %s: %v", metaFile, err) 154 } 155 156 if !m.Version.IsValid() { 157 return nil, errors.Errorf("unexpected block meta version: %s version: %d", metaFile, m.Version) 158 } 159 160 block := BlockFromMeta(m) 161 162 // Get the meta.json attributes. 163 attrs, err := w.bkt.Attributes(ctx, metaFile) 164 if err != nil { 165 return nil, errors.Wrapf(err, "read meta file attributes: %v", metaFile) 166 } 167 168 // Since the meta.json file is the last file of a block being uploaded and it's immutable 169 // we can safely assume that the last modified timestamp of the meta.json is the time when 170 // the block has completed to be uploaded. 171 block.UploadedAt = attrs.LastModified.Unix() 172 173 return block, nil 174 } 175 176 func (w *Updater) updateBlockDeletionMarks(ctx context.Context, old []*BlockDeletionMark) ([]*BlockDeletionMark, error) { 177 out := make([]*BlockDeletionMark, 0, len(old)) 178 179 // Find all markers in the storage. 180 discovered, err := block.ListBlockDeletionMarks(ctx, w.bkt) 181 if err != nil { 182 return nil, err 183 } 184 185 level.Info(w.logger).Log("msg", "listed deletion markers", "count", len(discovered)) 186 187 // Since deletion marks are immutable, all markers already existing in the index can just be copied. 188 for _, m := range old { 189 if _, ok := discovered[m.ID]; ok { 190 out = append(out, m) 191 delete(discovered, m.ID) 192 } 193 } 194 195 // Remaining markers are new ones and we have to fetch them. 196 for id := range discovered { 197 m, err := w.updateBlockDeletionMarkIndexEntry(ctx, id) 198 if errors.Is(err, ErrBlockDeletionMarkNotFound) { 199 // This could happen if the block is permanently deleted between the "list objects" and now. 200 level.Warn(w.logger).Log("msg", "skipped missing block deletion mark when updating bucket index", "block", id.String()) 201 continue 202 } 203 if errors.Is(err, ErrBlockDeletionMarkCorrupted) { 204 level.Error(w.logger).Log("msg", "skipped corrupted block deletion mark when updating bucket index", "block", id.String(), "err", err) 205 continue 206 } 207 if err != nil { 208 return nil, err 209 } 210 211 out = append(out, m) 212 } 213 214 level.Info(w.logger).Log("msg", "updated deletion markers for recently marked blocks", "count", len(discovered), "total_deletion_markers", len(out)) 215 216 return out, nil 217 } 218 219 func (w *Updater) updateBlockDeletionMarkIndexEntry(ctx context.Context, id ulid.ULID) (*BlockDeletionMark, error) { 220 m := block.DeletionMark{} 221 222 if err := block.ReadMarker(ctx, w.logger, w.bkt, id.String(), &m); err != nil { 223 if errors.Is(err, block.ErrorMarkerNotFound) { 224 return nil, errors.Wrap(ErrBlockDeletionMarkNotFound, err.Error()) 225 } 226 if errors.Is(err, block.ErrorUnmarshalMarker) { 227 return nil, errors.Wrap(ErrBlockDeletionMarkCorrupted, err.Error()) 228 } 229 return nil, err 230 } 231 232 return DeletionMarkFromBlockMarker(&m), nil 233 }