github.com/thanos-io/thanos@v0.32.5/cmd/thanos/downsample.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package main 5 6 import ( 7 "context" 8 "os" 9 "path/filepath" 10 "sort" 11 "sync" 12 "time" 13 14 extflag "github.com/efficientgo/tools/extkingpin" 15 "github.com/go-kit/log" 16 "github.com/go-kit/log/level" 17 "github.com/oklog/run" 18 "github.com/oklog/ulid" 19 "github.com/pkg/errors" 20 "github.com/prometheus/client_golang/prometheus" 21 "github.com/prometheus/client_golang/prometheus/promauto" 22 "github.com/prometheus/prometheus/tsdb" 23 "github.com/prometheus/prometheus/tsdb/chunkenc" 24 25 "github.com/thanos-io/objstore" 26 "github.com/thanos-io/objstore/client" 27 objstoretracing "github.com/thanos-io/objstore/tracing/opentracing" 28 29 "github.com/thanos-io/thanos/pkg/block" 30 "github.com/thanos-io/thanos/pkg/block/metadata" 31 "github.com/thanos-io/thanos/pkg/compact/downsample" 32 "github.com/thanos-io/thanos/pkg/component" 33 "github.com/thanos-io/thanos/pkg/errutil" 34 "github.com/thanos-io/thanos/pkg/extprom" 35 "github.com/thanos-io/thanos/pkg/prober" 36 "github.com/thanos-io/thanos/pkg/runutil" 37 httpserver "github.com/thanos-io/thanos/pkg/server/http" 38 ) 39 40 type DownsampleMetrics struct { 41 downsamples *prometheus.CounterVec 42 downsampleFailures *prometheus.CounterVec 43 downsampleDuration *prometheus.HistogramVec 44 } 45 46 func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { 47 m := new(DownsampleMetrics) 48 49 m.downsamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 50 Name: "thanos_compact_downsample_total", 51 Help: "Total number of downsampling attempts.", 52 }, []string{"group"}) 53 m.downsampleFailures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 54 Name: "thanos_compact_downsample_failures_total", 55 Help: "Total number of failed downsampling attempts.", 56 }, []string{"group"}) 57 m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ 58 Name: "thanos_compact_downsample_duration_seconds", 59 Help: "Duration of downsample runs", 60 Buckets: []float64{60, 300, 900, 1800, 3600, 7200, 14400}, // 1m, 5m, 15m, 30m, 60m, 120m, 240m 61 }, []string{"group"}) 62 63 return m 64 } 65 66 func RunDownsample( 67 g *run.Group, 68 logger log.Logger, 69 reg *prometheus.Registry, 70 httpBindAddr string, 71 httpTLSConfig string, 72 httpGracePeriod time.Duration, 73 dataDir string, 74 waitInterval time.Duration, 75 downsampleConcurrency int, 76 blockFilesConcurrency int, 77 objStoreConfig *extflag.PathOrContent, 78 comp component.Component, 79 hashFunc metadata.HashFunc, 80 ) error { 81 confContentYaml, err := objStoreConfig.Content() 82 if err != nil { 83 return err 84 } 85 86 bkt, err := client.NewBucket(logger, confContentYaml, component.Downsample.String()) 87 if err != nil { 88 return err 89 } 90 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 91 92 // While fetching blocks, filter out blocks that were marked for no downsample. 93 metaFetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix("thanos_", reg), []block.MetadataFilter{ 94 block.NewDeduplicateFilter(block.FetcherConcurrency), 95 downsample.NewGatherNoDownsampleMarkFilter(logger, insBkt), 96 }) 97 if err != nil { 98 return errors.Wrap(err, "create meta fetcher") 99 } 100 101 // Ensure we close up everything properly. 102 defer func() { 103 if err != nil { 104 runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") 105 } 106 }() 107 108 httpProbe := prober.NewHTTP() 109 statusProber := prober.Combine( 110 httpProbe, 111 prober.NewInstrumentation(comp, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)), 112 ) 113 114 metrics := newDownsampleMetrics(reg) 115 // Start cycle of syncing blocks from the bucket and garbage collecting the bucket. 116 { 117 ctx, cancel := context.WithCancel(context.Background()) 118 119 g.Add(func() error { 120 defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") 121 statusProber.Ready() 122 123 return runutil.Repeat(waitInterval, ctx.Done(), func() error { 124 level.Info(logger).Log("msg", "start first pass of downsampling") 125 metas, _, err := metaFetcher.Fetch(ctx) 126 if err != nil { 127 return errors.Wrap(err, "sync before first pass of downsampling") 128 } 129 130 for _, meta := range metas { 131 groupKey := meta.Thanos.GroupKey() 132 metrics.downsamples.WithLabelValues(groupKey) 133 metrics.downsampleFailures.WithLabelValues(groupKey) 134 } 135 if err := downsampleBucket(ctx, logger, metrics, insBkt, metas, dataDir, downsampleConcurrency, blockFilesConcurrency, hashFunc, false); err != nil { 136 return errors.Wrap(err, "downsampling failed") 137 } 138 139 level.Info(logger).Log("msg", "start second pass of downsampling") 140 metas, _, err = metaFetcher.Fetch(ctx) 141 if err != nil { 142 return errors.Wrap(err, "sync before second pass of downsampling") 143 } 144 if err := downsampleBucket(ctx, logger, metrics, insBkt, metas, dataDir, downsampleConcurrency, blockFilesConcurrency, hashFunc, false); err != nil { 145 return errors.Wrap(err, "downsampling failed") 146 } 147 return nil 148 }) 149 }, func(error) { 150 cancel() 151 }) 152 } 153 154 srv := httpserver.New(logger, reg, comp, httpProbe, 155 httpserver.WithListen(httpBindAddr), 156 httpserver.WithGracePeriod(httpGracePeriod), 157 httpserver.WithTLSConfig(httpTLSConfig), 158 ) 159 160 g.Add(func() error { 161 statusProber.Healthy() 162 163 return srv.ListenAndServe() 164 }, func(err error) { 165 statusProber.NotReady(err) 166 defer statusProber.NotHealthy(err) 167 168 srv.Shutdown(err) 169 }) 170 171 level.Info(logger).Log("msg", "starting downsample node") 172 return nil 173 } 174 175 func downsampleBucket( 176 ctx context.Context, 177 logger log.Logger, 178 metrics *DownsampleMetrics, 179 bkt objstore.Bucket, 180 metas map[ulid.ULID]*metadata.Meta, 181 dir string, 182 downsampleConcurrency int, 183 blockFilesConcurrency int, 184 hashFunc metadata.HashFunc, 185 acceptMalformedIndex bool, 186 ) (rerr error) { 187 if err := os.MkdirAll(dir, 0750); err != nil { 188 return errors.Wrap(err, "create dir") 189 } 190 191 defer func() { 192 // Leave the downsample directory for inspection if it is a halt error 193 // or if it is not then so that possibly we would not have to download everything again. 194 if rerr != nil { 195 return 196 } 197 if err := os.RemoveAll(dir); err != nil { 198 level.Error(logger).Log("msg", "failed to remove downsample cache directory", "path", dir, "err", err) 199 } 200 }() 201 202 // mapping from a hash over all source IDs to blocks. We don't need to downsample a block 203 // if a downsampled version with the same hash already exists. 204 sources5m := map[ulid.ULID]struct{}{} 205 sources1h := map[ulid.ULID]struct{}{} 206 207 for _, m := range metas { 208 switch m.Thanos.Downsample.Resolution { 209 case downsample.ResLevel0: 210 continue 211 case downsample.ResLevel1: 212 for _, id := range m.Compaction.Sources { 213 sources5m[id] = struct{}{} 214 } 215 case downsample.ResLevel2: 216 for _, id := range m.Compaction.Sources { 217 sources1h[id] = struct{}{} 218 } 219 default: 220 return errors.Errorf("unexpected downsampling resolution %d", m.Thanos.Downsample.Resolution) 221 } 222 } 223 224 ignoreDirs := []string{} 225 for ulid := range metas { 226 ignoreDirs = append(ignoreDirs, ulid.String()) 227 } 228 229 if err := runutil.DeleteAll(dir, ignoreDirs...); err != nil { 230 level.Warn(logger).Log("msg", "failed deleting potentially outdated directories/files, some disk space usage might have leaked. Continuing", "err", err, "dir", dir) 231 } 232 233 metasULIDS := make([]ulid.ULID, 0, len(metas)) 234 for k := range metas { 235 metasULIDS = append(metasULIDS, k) 236 } 237 sort.Slice(metasULIDS, func(i, j int) bool { 238 return metasULIDS[i].Compare(metasULIDS[j]) < 0 239 }) 240 241 var ( 242 wg sync.WaitGroup 243 metaCh = make(chan *metadata.Meta) 244 downsampleErrs errutil.MultiError 245 errCh = make(chan error, downsampleConcurrency) 246 workerCtx, workerCancel = context.WithCancel(ctx) 247 ) 248 249 defer workerCancel() 250 251 level.Debug(logger).Log("msg", "downsampling bucket", "concurrency", downsampleConcurrency) 252 for i := 0; i < downsampleConcurrency; i++ { 253 wg.Add(1) 254 go func() { 255 defer wg.Done() 256 for m := range metaCh { 257 resolution := downsample.ResLevel1 258 errMsg := "downsampling to 5 min" 259 if m.Thanos.Downsample.Resolution == downsample.ResLevel1 { 260 resolution = downsample.ResLevel2 261 errMsg = "downsampling to 60 min" 262 } 263 if err := processDownsampling(workerCtx, logger, bkt, m, dir, resolution, hashFunc, metrics, acceptMalformedIndex, blockFilesConcurrency); err != nil { 264 metrics.downsampleFailures.WithLabelValues(m.Thanos.GroupKey()).Inc() 265 errCh <- errors.Wrap(err, errMsg) 266 267 } 268 metrics.downsamples.WithLabelValues(m.Thanos.GroupKey()).Inc() 269 } 270 }() 271 } 272 273 // Workers scheduled, distribute blocks. 274 metaSendLoop: 275 for _, mk := range metasULIDS { 276 m := metas[mk] 277 278 switch m.Thanos.Downsample.Resolution { 279 case downsample.ResLevel2: 280 continue 281 282 case downsample.ResLevel0: 283 missing := false 284 for _, id := range m.Compaction.Sources { 285 if _, ok := sources5m[id]; !ok { 286 missing = true 287 break 288 } 289 } 290 if !missing { 291 continue 292 } 293 // Only downsample blocks once we are sure to get roughly 2 chunks out of it. 294 // NOTE(fabxc): this must match with at which block size the compactor creates downsampled 295 // blocks. Otherwise we may never downsample some data. 296 if m.MaxTime-m.MinTime < downsample.ResLevel1DownsampleRange { 297 continue 298 } 299 300 case downsample.ResLevel1: 301 missing := false 302 for _, id := range m.Compaction.Sources { 303 if _, ok := sources1h[id]; !ok { 304 missing = true 305 break 306 } 307 } 308 if !missing { 309 continue 310 } 311 // Only downsample blocks once we are sure to get roughly 2 chunks out of it. 312 // NOTE(fabxc): this must match with at which block size the compactor creates downsampled 313 // blocks. Otherwise we may never downsample some data. 314 if m.MaxTime-m.MinTime < downsample.ResLevel2DownsampleRange { 315 continue 316 } 317 } 318 319 select { 320 case <-workerCtx.Done(): 321 downsampleErrs.Add(workerCtx.Err()) 322 break metaSendLoop 323 case metaCh <- m: 324 case downsampleErr := <-errCh: 325 downsampleErrs.Add(downsampleErr) 326 break metaSendLoop 327 } 328 } 329 330 close(metaCh) 331 wg.Wait() 332 workerCancel() 333 close(errCh) 334 335 // Collect any other error reported by the workers. 336 for downsampleErr := range errCh { 337 downsampleErrs.Add(downsampleErr) 338 } 339 340 return downsampleErrs.Err() 341 } 342 343 func processDownsampling( 344 ctx context.Context, 345 logger log.Logger, 346 bkt objstore.Bucket, 347 m *metadata.Meta, 348 dir string, 349 resolution int64, 350 hashFunc metadata.HashFunc, 351 metrics *DownsampleMetrics, 352 acceptMalformedIndex bool, 353 blockFilesConcurrency int, 354 ) error { 355 begin := time.Now() 356 bdir := filepath.Join(dir, m.ULID.String()) 357 358 err := block.Download(ctx, logger, bkt, m.ULID, bdir, objstore.WithFetchConcurrency(blockFilesConcurrency)) 359 if err != nil { 360 return errors.Wrapf(err, "download block %s", m.ULID) 361 } 362 level.Info(logger).Log("msg", "downloaded block", "id", m.ULID, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) 363 364 if err := block.VerifyIndex(logger, filepath.Join(bdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil && !acceptMalformedIndex { 365 return errors.Wrap(err, "input block index not valid") 366 } 367 368 begin = time.Now() 369 370 var pool chunkenc.Pool 371 if m.Thanos.Downsample.Resolution == 0 { 372 pool = chunkenc.NewPool() 373 } else { 374 pool = downsample.NewPool() 375 } 376 377 b, err := tsdb.OpenBlock(logger, bdir, pool) 378 if err != nil { 379 return errors.Wrapf(err, "open block %s", m.ULID) 380 } 381 defer runutil.CloseWithLogOnErr(log.With(logger, "outcome", "potential left mmap file handlers left"), b, "tsdb reader") 382 383 id, err := downsample.Downsample(logger, m, b, dir, resolution) 384 if err != nil { 385 return errors.Wrapf(err, "downsample block %s to window %d", m.ULID, resolution) 386 } 387 resdir := filepath.Join(dir, id.String()) 388 389 downsampleDuration := time.Since(begin) 390 level.Info(logger).Log("msg", "downsampled block", 391 "from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds()) 392 metrics.downsampleDuration.WithLabelValues(m.Thanos.GroupKey()).Observe(downsampleDuration.Seconds()) 393 394 stats, err := block.GatherIndexHealthStats(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime) 395 if err == nil { 396 err = stats.AnyErr() 397 } 398 if err != nil && !acceptMalformedIndex { 399 return errors.Wrap(err, "output block index not valid") 400 } 401 402 meta, err := metadata.ReadFromDir(resdir) 403 if err != nil { 404 return errors.Wrap(err, "read meta") 405 } 406 407 if stats.ChunkMaxSize > 0 { 408 meta.Thanos.IndexStats.ChunkMaxSize = stats.ChunkMaxSize 409 } 410 if stats.SeriesMaxSize > 0 { 411 meta.Thanos.IndexStats.SeriesMaxSize = stats.SeriesMaxSize 412 } 413 if err := meta.WriteToDir(logger, resdir); err != nil { 414 return errors.Wrap(err, "write meta") 415 } 416 417 begin = time.Now() 418 419 err = block.Upload(ctx, logger, bkt, resdir, hashFunc) 420 if err != nil { 421 return errors.Wrapf(err, "upload downsampled block %s", id) 422 } 423 424 level.Info(logger).Log("msg", "uploaded block", "id", id, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) 425 426 // It is not harmful if these fails. 427 if err := os.RemoveAll(bdir); err != nil { 428 level.Warn(logger).Log("msg", "failed to clean directory", "dir", bdir, "err", err) 429 } 430 if err := os.RemoveAll(resdir); err != nil { 431 level.Warn(logger).Log("msg", "failed to clean directory", "resdir", bdir, "err", err) 432 } 433 434 return nil 435 }