github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/storage/tsdb/bucketindex/updater.go (about) 1 package bucketindex 2 3 import ( 4 "context" 5 "encoding/json" 6 "io/ioutil" 7 "path" 8 "time" 9 10 "github.com/go-kit/log" 11 "github.com/go-kit/log/level" 12 "github.com/grafana/dskit/runutil" 13 "github.com/oklog/ulid" 14 "github.com/pkg/errors" 15 "github.com/thanos-io/thanos/pkg/block" 16 "github.com/thanos-io/thanos/pkg/block/metadata" 17 "github.com/thanos-io/thanos/pkg/objstore" 18 19 "github.com/cortexproject/cortex/pkg/storage/bucket" 20 util_log "github.com/cortexproject/cortex/pkg/util/log" 21 ) 22 23 var ( 24 ErrBlockMetaNotFound = block.ErrorSyncMetaNotFound 25 ErrBlockMetaCorrupted = block.ErrorSyncMetaCorrupted 26 ErrBlockDeletionMarkNotFound = errors.New("block deletion mark not found") 27 ErrBlockDeletionMarkCorrupted = errors.New("block deletion mark corrupted") 28 ) 29 30 // Updater is responsible to generate an update in-memory bucket index. 31 type Updater struct { 32 bkt objstore.InstrumentedBucket 33 logger log.Logger 34 } 35 36 func NewUpdater(bkt objstore.Bucket, userID string, cfgProvider bucket.TenantConfigProvider, logger log.Logger) *Updater { 37 return &Updater{ 38 bkt: bucket.NewUserBucketClient(userID, bkt, cfgProvider), 39 logger: util_log.WithUserID(userID, logger), 40 } 41 } 42 43 // UpdateIndex generates the bucket index and returns it, without storing it to the storage. 44 // If the old index is not passed in input, then the bucket index will be generated from scratch. 45 func (w *Updater) UpdateIndex(ctx context.Context, old *Index) (*Index, map[ulid.ULID]error, error) { 46 var oldBlocks []*Block 47 var oldBlockDeletionMarks []*BlockDeletionMark 48 49 // Read the old index, if provided. 50 if old != nil { 51 oldBlocks = old.Blocks 52 oldBlockDeletionMarks = old.BlockDeletionMarks 53 } 54 55 blocks, partials, err := w.updateBlocks(ctx, oldBlocks) 56 if err != nil { 57 return nil, nil, err 58 } 59 60 blockDeletionMarks, err := w.updateBlockDeletionMarks(ctx, oldBlockDeletionMarks) 61 if err != nil { 62 return nil, nil, err 63 } 64 65 return &Index{ 66 Version: IndexVersion1, 67 Blocks: blocks, 68 BlockDeletionMarks: blockDeletionMarks, 69 UpdatedAt: time.Now().Unix(), 70 }, partials, nil 71 } 72 73 func (w *Updater) updateBlocks(ctx context.Context, old []*Block) (blocks []*Block, partials map[ulid.ULID]error, _ error) { 74 discovered := map[ulid.ULID]struct{}{} 75 partials = map[ulid.ULID]error{} 76 77 // Find all blocks in the storage. 78 err := w.bkt.Iter(ctx, "", func(name string) error { 79 if id, ok := block.IsBlockDir(name); ok { 80 discovered[id] = struct{}{} 81 } 82 return nil 83 }) 84 if err != nil { 85 return nil, nil, errors.Wrap(err, "list blocks") 86 } 87 88 // Since blocks are immutable, all blocks already existing in the index can just be copied. 89 for _, b := range old { 90 if _, ok := discovered[b.ID]; ok { 91 blocks = append(blocks, b) 92 delete(discovered, b.ID) 93 } 94 } 95 96 // Remaining blocks are new ones and we have to fetch the meta.json for each of them, in order 97 // to find out if their upload has been completed (meta.json is uploaded last) and get the block 98 // information to store in the bucket index. 99 for id := range discovered { 100 b, err := w.updateBlockIndexEntry(ctx, id) 101 if err == nil { 102 blocks = append(blocks, b) 103 continue 104 } 105 106 if errors.Is(err, ErrBlockMetaNotFound) { 107 partials[id] = err 108 level.Warn(w.logger).Log("msg", "skipped partial block when updating bucket index", "block", id.String()) 109 continue 110 } 111 if errors.Is(err, ErrBlockMetaCorrupted) { 112 partials[id] = err 113 level.Error(w.logger).Log("msg", "skipped block with corrupted meta.json when updating bucket index", "block", id.String(), "err", err) 114 continue 115 } 116 return nil, nil, err 117 } 118 119 return blocks, partials, nil 120 } 121 122 func (w *Updater) updateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Block, error) { 123 metaFile := path.Join(id.String(), block.MetaFilename) 124 125 // Get the block's meta.json file. 126 r, err := w.bkt.Get(ctx, metaFile) 127 if w.bkt.IsObjNotFoundErr(err) { 128 return nil, ErrBlockMetaNotFound 129 } 130 if err != nil { 131 return nil, errors.Wrapf(err, "get block meta file: %v", metaFile) 132 } 133 defer runutil.CloseWithLogOnErr(w.logger, r, "close get block meta file") 134 135 metaContent, err := ioutil.ReadAll(r) 136 if err != nil { 137 return nil, errors.Wrapf(err, "read block meta file: %v", metaFile) 138 } 139 140 // Unmarshal it. 141 m := metadata.Meta{} 142 if err := json.Unmarshal(metaContent, &m); err != nil { 143 return nil, errors.Wrapf(ErrBlockMetaCorrupted, "unmarshal block meta file %s: %v", metaFile, err) 144 } 145 146 if m.Version != metadata.TSDBVersion1 { 147 return nil, errors.Errorf("unexpected block meta version: %s version: %d", metaFile, m.Version) 148 } 149 150 block := BlockFromThanosMeta(m) 151 152 // Get the meta.json attributes. 153 attrs, err := w.bkt.Attributes(ctx, metaFile) 154 if err != nil { 155 return nil, errors.Wrapf(err, "read meta file attributes: %v", metaFile) 156 } 157 158 // Since the meta.json file is the last file of a block being uploaded and it's immutable 159 // we can safely assume that the last modified timestamp of the meta.json is the time when 160 // the block has completed to be uploaded. 161 block.UploadedAt = attrs.LastModified.Unix() 162 163 return block, nil 164 } 165 166 func (w *Updater) updateBlockDeletionMarks(ctx context.Context, old []*BlockDeletionMark) ([]*BlockDeletionMark, error) { 167 out := make([]*BlockDeletionMark, 0, len(old)) 168 discovered := map[ulid.ULID]struct{}{} 169 170 // Find all markers in the storage. 171 err := w.bkt.Iter(ctx, MarkersPathname+"/", func(name string) error { 172 if blockID, ok := IsBlockDeletionMarkFilename(path.Base(name)); ok { 173 discovered[blockID] = struct{}{} 174 } 175 return nil 176 }) 177 if err != nil { 178 return nil, errors.Wrap(err, "list block deletion marks") 179 } 180 181 // Since deletion marks are immutable, all markers already existing in the index can just be copied. 182 for _, m := range old { 183 if _, ok := discovered[m.ID]; ok { 184 out = append(out, m) 185 delete(discovered, m.ID) 186 } 187 } 188 189 // Remaining markers are new ones and we have to fetch them. 190 for id := range discovered { 191 m, err := w.updateBlockDeletionMarkIndexEntry(ctx, id) 192 if errors.Is(err, ErrBlockDeletionMarkNotFound) { 193 // This could happen if the block is permanently deleted between the "list objects" and now. 194 level.Warn(w.logger).Log("msg", "skipped missing block deletion mark when updating bucket index", "block", id.String()) 195 continue 196 } 197 if errors.Is(err, ErrBlockDeletionMarkCorrupted) { 198 level.Error(w.logger).Log("msg", "skipped corrupted block deletion mark when updating bucket index", "block", id.String(), "err", err) 199 continue 200 } 201 if err != nil { 202 return nil, err 203 } 204 205 out = append(out, m) 206 } 207 208 return out, nil 209 } 210 211 func (w *Updater) updateBlockDeletionMarkIndexEntry(ctx context.Context, id ulid.ULID) (*BlockDeletionMark, error) { 212 m := metadata.DeletionMark{} 213 214 if err := metadata.ReadMarker(ctx, w.logger, w.bkt, id.String(), &m); err != nil { 215 if errors.Is(err, metadata.ErrorMarkerNotFound) { 216 return nil, errors.Wrap(ErrBlockDeletionMarkNotFound, err.Error()) 217 } 218 if errors.Is(err, metadata.ErrorUnmarshalMarker) { 219 return nil, errors.Wrap(ErrBlockDeletionMarkCorrupted, err.Error()) 220 } 221 return nil, err 222 } 223 224 return BlockDeletionMarkFromThanosMarker(&m), nil 225 }