github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/shipper/shipper.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 // Package shipper detects directories on the local file system and uploads 5 // them to a block storage. 6 7 // TODO: Fix attribution 8 9 package shipper 10 11 import ( 12 "context" 13 "encoding/json" 14 "math" 15 "os" 16 "path" 17 "path/filepath" 18 "sort" 19 20 "github.com/go-kit/log" 21 "github.com/go-kit/log/level" 22 "github.com/grafana/dskit/runutil" 23 "github.com/oklog/ulid/v2" 24 "github.com/pkg/errors" 25 "github.com/prometheus/client_golang/prometheus" 26 "github.com/prometheus/client_golang/prometheus/promauto" 27 "github.com/prometheus/common/model" 28 "github.com/prometheus/prometheus/tsdb" 29 "github.com/prometheus/prometheus/tsdb/fileutil" 30 "github.com/thanos-io/objstore" 31 32 "github.com/grafana/pyroscope/pkg/phlaredb/block" 33 ) 34 35 type metrics struct { 36 dirSyncs prometheus.Counter 37 dirSyncFailures prometheus.Counter 38 uploads prometheus.Counter 39 uploadFailures prometheus.Counter 40 uploadedCompacted prometheus.Gauge 41 } 42 43 func newMetrics(reg prometheus.Registerer, uploadCompacted bool) *metrics { 44 var m metrics 45 46 m.dirSyncs = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 47 Name: "pyroscope_shipper_dir_syncs_total", 48 Help: "Total number of dir syncs", 49 }) 50 m.dirSyncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 51 Name: "pyroscope_shipper_dir_sync_failures_total", 52 Help: "Total number of failed dir syncs", 53 }) 54 m.uploads = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 55 Name: "pyroscope_shipper_uploads_total", 56 Help: "Total number of uploaded blocks", 57 }) 58 m.uploadFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 59 Name: "pyroscope_shipper_upload_failures_total", 60 Help: "Total number of block upload failures", 61 }) 62 uploadCompactedGaugeOpts := prometheus.GaugeOpts{ 63 Name: "pyroscope_shipper_upload_compacted_done", 64 Help: "If 1 it means shipper uploaded all compacted blocks from the filesystem.", 65 } 66 if uploadCompacted { 67 m.uploadedCompacted = promauto.With(reg).NewGauge(uploadCompactedGaugeOpts) 68 } else { 69 m.uploadedCompacted = promauto.With(nil).NewGauge(uploadCompactedGaugeOpts) 70 } 71 return &m 72 } 73 74 // Shipper watches a directory for matching files and directories and uploads 75 // them to a remote data store. 76 type Shipper struct { 77 logger log.Logger 78 metrics *metrics 79 bucket objstore.Bucket 80 blockLister BlockLister 81 source block.SourceType 82 83 uploadCompacted bool 84 allowOutOfOrderUploads bool 85 } 86 87 // New creates a new shipper that detects new TSDB blocks in dir and uploads them to 88 // remote if necessary. It attaches the Thanos metadata section in each meta JSON file. 89 // If uploadCompacted is enabled, it also uploads compacted blocks which are already in filesystem. 90 func New( 91 logger log.Logger, 92 r prometheus.Registerer, 93 blockLister BlockLister, 94 bucket objstore.Bucket, 95 source block.SourceType, 96 uploadCompacted bool, 97 allowOutOfOrderUploads bool, 98 ) *Shipper { 99 if logger == nil { 100 logger = log.NewNopLogger() 101 } 102 103 return &Shipper{ 104 logger: logger, 105 blockLister: blockLister, 106 bucket: bucket, 107 metrics: newMetrics(r, uploadCompacted), 108 source: source, 109 allowOutOfOrderUploads: allowOutOfOrderUploads, 110 uploadCompacted: uploadCompacted, 111 } 112 } 113 114 type BlockLister interface { 115 LocalDataPath() string 116 BlockMetas(ctx context.Context) ([]*block.Meta, error) 117 } 118 119 // Timestamps returns the minimum timestamp for which data is available and the highest timestamp 120 // of blocks that were successfully uploaded. 121 func (s *Shipper) Timestamps() (minTime, maxSyncTime model.Time, err error) { 122 ctx := context.Background() 123 meta, err := ReadMetaFile(s.blockLister.LocalDataPath()) 124 if err != nil { 125 return 0, 0, errors.Wrap(err, "read shipper meta file") 126 } 127 // Build a map of blocks we already uploaded. 128 hasUploaded := make(map[ulid.ULID]struct{}, len(meta.Uploaded)) 129 for _, id := range meta.Uploaded { 130 hasUploaded[id] = struct{}{} 131 } 132 133 minTime = model.Time(math.MaxInt64) 134 maxSyncTime = model.Time(math.MinInt64) 135 136 metas, err := s.blockLister.BlockMetas(ctx) 137 if err != nil { 138 return 0, 0, err 139 } 140 for _, m := range metas { 141 if m.MinTime < minTime { 142 minTime = m.MinTime 143 } 144 if _, ok := hasUploaded[m.ULID]; ok && m.MaxTime > maxSyncTime { 145 maxSyncTime = m.MaxTime 146 } 147 } 148 149 if minTime == math.MaxInt64 { 150 // No block yet found. We cannot assume any min block size so propagate 0 minTime. 151 minTime = 0 152 } 153 return minTime, maxSyncTime, nil 154 } 155 156 type lazyOverlapChecker struct { 157 synced bool 158 logger log.Logger 159 bucket objstore.Bucket 160 161 metas []tsdb.BlockMeta 162 lookupMetas map[ulid.ULID]struct{} 163 } 164 165 func newLazyOverlapChecker(logger log.Logger, bucket objstore.Bucket) *lazyOverlapChecker { 166 return &lazyOverlapChecker{ 167 logger: logger, 168 bucket: bucket, 169 170 lookupMetas: map[ulid.ULID]struct{}{}, 171 } 172 } 173 174 func (c *lazyOverlapChecker) sync(ctx context.Context) error { 175 if err := c.bucket.Iter(ctx, "", func(path string) error { 176 id, ok := block.IsBlockDir(path) 177 if !ok { 178 return nil 179 } 180 181 m, err := block.DownloadMeta(ctx, c.logger, c.bucket, id) 182 if err != nil { 183 return err 184 } 185 186 c.metas = append(c.metas, m.TSDBBlockMeta()) 187 c.lookupMetas[m.ULID] = struct{}{} 188 return nil 189 190 }); err != nil { 191 return errors.Wrap(err, "get all block meta.") 192 } 193 194 c.synced = true 195 return nil 196 } 197 198 func (c *lazyOverlapChecker) IsOverlapping(ctx context.Context, newMeta tsdb.BlockMeta) error { 199 if !c.synced { 200 level.Info(c.logger).Log("msg", "gathering all existing blocks from the remote bucket for check", "id", newMeta.ULID.String()) 201 if err := c.sync(ctx); err != nil { 202 return err 203 } 204 } 205 206 // TODO(bwplotka) so confusing! we need to sort it first. Add comment to TSDB code. 207 metas := append([]tsdb.BlockMeta{newMeta}, c.metas...) 208 sort.Slice(metas, func(i, j int) bool { 209 return metas[i].MinTime < metas[j].MinTime 210 }) 211 if o := tsdb.OverlappingBlocks(metas); len(o) > 0 { 212 // TODO(bwplotka): Consider checking if overlaps relates to block in concern? 213 return errors.Errorf("shipping compacted block %s is blocked; overlap spotted: %s", newMeta.ULID, o.String()) 214 } 215 return nil 216 } 217 218 // Sync performs a single synchronization, which ensures all non-compacted local blocks have been uploaded 219 // to the object bucket once. 220 // 221 // If uploaded. 222 // 223 // It is not concurrency-safe, however it is compactor-safe (running concurrently with compactor is ok). 224 func (s *Shipper) Sync(ctx context.Context) (uploaded int, err error) { 225 meta, err := ReadMetaFile(s.blockLister.LocalDataPath()) 226 if err != nil { 227 // If we encounter any error, proceed with an empty meta file and overwrite it later. 228 // The meta file is only used to avoid unnecessary bucket.Exists call, 229 // which are properly handled by the system if their occur anyway. 230 if !os.IsNotExist(err) { 231 level.Warn(s.logger).Log("msg", "reading meta file failed, will override it", "err", err) 232 } 233 meta = &Meta{Version: MetaVersion1} 234 } 235 236 // Build a map of blocks we already uploaded. 237 hasUploaded := make(map[ulid.ULID]struct{}, len(meta.Uploaded)) 238 for _, id := range meta.Uploaded { 239 hasUploaded[id] = struct{}{} 240 } 241 242 // Reset the uploaded slice so we can rebuild it only with blocks that still exist locally. 243 meta.Uploaded = nil 244 245 var ( 246 checker = newLazyOverlapChecker(s.logger, s.bucket) 247 uploadErrs int 248 ) 249 250 metas, err := s.blockLister.BlockMetas(ctx) 251 if err != nil { 252 return 0, err 253 } 254 for _, m := range metas { 255 // Do not sync a block if we already uploaded or ignored it. If it's no longer found in the bucket, 256 // it was generally removed by the compaction process. 257 if _, uploaded := hasUploaded[m.ULID]; uploaded { 258 meta.Uploaded = append(meta.Uploaded, m.ULID) 259 continue 260 } 261 262 // We only ship of the first compacted block level as normal flow. 263 if m.Compaction.Level > 1 { 264 if !s.uploadCompacted { 265 continue 266 } 267 } 268 269 // Check against bucket if the meta file for this block exists. 270 ok, err := s.bucket.Exists(ctx, path.Join(m.ULID.String(), block.MetaFilename)) 271 if err != nil { 272 return 0, errors.Wrap(err, "check exists") 273 } 274 if ok { 275 meta.Uploaded = append(meta.Uploaded, m.ULID) 276 continue 277 } 278 279 // Skip overlap check if out of order uploads is enabled. 280 if m.Compaction.Level > 1 && !s.allowOutOfOrderUploads { 281 if err := checker.IsOverlapping(ctx, m.TSDBBlockMeta()); err != nil { 282 return 0, errors.Errorf("Found overlap or error during sync, cannot upload compacted block, details: %v", err) 283 } 284 } 285 286 if err := s.upload(ctx, m); err != nil { 287 if !s.allowOutOfOrderUploads { 288 return 0, errors.Wrapf(err, "upload %v", m.ULID) 289 } 290 291 // No error returned, just log line. This is because we want other blocks to be uploaded even 292 // though this one failed. It will be retried on second Sync iteration. 293 level.Error(s.logger).Log("msg", "shipping failed", "block", m.ULID, "err", err) 294 uploadErrs++ 295 continue 296 } 297 meta.Uploaded = append(meta.Uploaded, m.ULID) 298 uploaded++ 299 s.metrics.uploads.Inc() 300 } 301 if err := WriteMetaFile(s.logger, s.blockLister.LocalDataPath(), meta); err != nil { 302 level.Warn(s.logger).Log("msg", "updating meta file failed", "err", err) 303 } 304 305 s.metrics.dirSyncs.Inc() 306 if uploadErrs > 0 { 307 s.metrics.uploadFailures.Add(float64(uploadErrs)) 308 return uploaded, errors.Errorf("failed to sync %v blocks", uploadErrs) 309 } 310 311 if s.uploadCompacted { 312 s.metrics.uploadedCompacted.Set(1) 313 } 314 return uploaded, nil 315 } 316 317 // sync uploads the block if not exists in remote storage. 318 // TODO(khyatisoneji): Double check if block does not have deletion-mark.json for some reason, otherwise log it or return error. 319 func (s *Shipper) upload(ctx context.Context, meta *block.Meta) error { 320 level.Info(s.logger).Log("msg", "upload new block", "id", meta.ULID) 321 322 updir := filepath.Join(s.blockLister.LocalDataPath(), meta.ULID.String()) 323 324 meta.Source = s.source 325 if _, err := meta.WriteToFile(s.logger, updir); err != nil { 326 return errors.Wrap(err, "write meta file") 327 } 328 return block.Upload(ctx, s.logger, s.bucket, updir) 329 } 330 331 // Meta defines the format thanos.shipper.json file that the shipper places in the data directory. 332 type Meta struct { 333 Version int `json:"version"` 334 Uploaded []ulid.ULID `json:"uploaded"` 335 } 336 337 const ( 338 // MetaFilename is the known JSON filename for meta information. 339 MetaFilename = "shipper.json" 340 341 // MetaVersion1 represents 1 version of meta. 342 MetaVersion1 = 1 343 ) 344 345 // WriteMetaFile writes the given meta into <dir>/thanos.shipper.json. 346 func WriteMetaFile(logger log.Logger, dir string, meta *Meta) error { 347 // Make any changes to the file appear atomic. 348 path := filepath.Join(dir, MetaFilename) 349 tmp := path + ".tmp" 350 351 f, err := os.Create(tmp) 352 if err != nil { 353 return err 354 } 355 356 enc := json.NewEncoder(f) 357 enc.SetIndent("", "\t") 358 359 if err := enc.Encode(meta); err != nil { 360 runutil.CloseWithLogOnErr(logger, f, "write meta file close") 361 return err 362 } 363 if err := f.Close(); err != nil { 364 return err 365 } 366 return renameFile(logger, tmp, path) 367 } 368 369 // ReadMetaFile reads the given meta from <dir>/shipper.json. 370 func ReadMetaFile(dir string) (*Meta, error) { 371 b, err := os.ReadFile(filepath.Join(dir, filepath.Clean(MetaFilename))) 372 if err != nil { 373 return nil, err 374 } 375 var m Meta 376 377 if err := json.Unmarshal(b, &m); err != nil { 378 return nil, err 379 } 380 if m.Version != MetaVersion1 { 381 return nil, errors.Errorf("unexpected meta file version %d", m.Version) 382 } 383 384 return &m, nil 385 } 386 387 func renameFile(logger log.Logger, from, to string) error { 388 if err := os.RemoveAll(to); err != nil { 389 return err 390 } 391 if err := os.Rename(from, to); err != nil { 392 return err 393 } 394 395 // Directory was renamed; sync parent dir to persist rename. 396 pdir, err := fileutil.OpenDir(filepath.Dir(to)) 397 if err != nil { 398 return err 399 } 400 401 if err = fileutil.Fdatasync(pdir); err != nil { 402 runutil.CloseWithLogOnErr(logger, pdir, "rename file dir close") 403 return err 404 } 405 return pdir.Close() 406 }