github.com/thanos-io/thanos@v0.32.5/cmd/thanos/tools_bucket.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package main 5 6 import ( 7 "context" 8 "crypto/rand" 9 "encoding/csv" 10 "encoding/json" 11 "fmt" 12 "io" 13 "net/http" 14 "os" 15 "path/filepath" 16 "sort" 17 "strconv" 18 "strings" 19 "text/template" 20 "time" 21 22 extflag "github.com/efficientgo/tools/extkingpin" 23 "github.com/go-kit/log" 24 "github.com/go-kit/log/level" 25 "github.com/oklog/run" 26 "github.com/oklog/ulid" 27 "github.com/olekukonko/tablewriter" 28 "github.com/opentracing/opentracing-go" 29 "github.com/pkg/errors" 30 "github.com/prometheus/client_golang/prometheus" 31 "github.com/prometheus/client_golang/prometheus/promauto" 32 prommodel "github.com/prometheus/common/model" 33 "github.com/prometheus/common/route" 34 "github.com/prometheus/prometheus/model/labels" 35 "github.com/prometheus/prometheus/model/relabel" 36 "github.com/prometheus/prometheus/tsdb" 37 "github.com/prometheus/prometheus/tsdb/chunkenc" 38 "golang.org/x/text/language" 39 "golang.org/x/text/message" 40 "gopkg.in/yaml.v3" 41 42 "github.com/thanos-io/objstore" 43 "github.com/thanos-io/objstore/client" 44 objstoretracing "github.com/thanos-io/objstore/tracing/opentracing" 45 46 v1 "github.com/thanos-io/thanos/pkg/api/blocks" 47 "github.com/thanos-io/thanos/pkg/block" 48 "github.com/thanos-io/thanos/pkg/block/metadata" 49 "github.com/thanos-io/thanos/pkg/compact" 50 "github.com/thanos-io/thanos/pkg/compact/downsample" 51 "github.com/thanos-io/thanos/pkg/compactv2" 52 "github.com/thanos-io/thanos/pkg/component" 53 "github.com/thanos-io/thanos/pkg/extkingpin" 54 "github.com/thanos-io/thanos/pkg/extprom" 55 extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http" 56 "github.com/thanos-io/thanos/pkg/logging" 57 "github.com/thanos-io/thanos/pkg/model" 58 "github.com/thanos-io/thanos/pkg/prober" 59 "github.com/thanos-io/thanos/pkg/replicate" 60 "github.com/thanos-io/thanos/pkg/runutil" 61 httpserver "github.com/thanos-io/thanos/pkg/server/http" 62 "github.com/thanos-io/thanos/pkg/store" 63 "github.com/thanos-io/thanos/pkg/ui" 64 "github.com/thanos-io/thanos/pkg/verifier" 65 ) 66 67 const extpromPrefix = "thanos_bucket_" 68 69 var ( 70 issuesVerifiersRegistry = verifier.Registry{ 71 Verifiers: []verifier.Verifier{verifier.OverlappedBlocksIssue{}}, 72 VerifierRepairers: []verifier.VerifierRepairer{ 73 verifier.IndexKnownIssues{}, 74 verifier.DuplicatedCompactionBlocks{}, 75 }, 76 } 77 inspectColumns = []string{"ULID", "FROM", "UNTIL", "RANGE", "UNTIL-DOWN", "#SERIES", "#SAMPLES", "#CHUNKS", "COMP-LEVEL", "COMP-FAILED", "LABELS", "RESOLUTION", "SOURCE"} 78 outputTypes = []string{"table", "tsv", "csv"} 79 ) 80 81 type outputType string 82 83 const ( 84 TABLE outputType = "table" 85 CSV outputType = "csv" 86 TSV outputType = "tsv" 87 ) 88 89 type bucketRewriteConfig struct { 90 blockIDs []string 91 tmpDir string 92 dryRun bool 93 promBlocks bool 94 deleteBlocks bool 95 } 96 97 type bucketInspectConfig struct { 98 selector []string 99 sortBy []string 100 timeout time.Duration 101 } 102 103 type bucketVerifyConfig struct { 104 repair bool 105 ids []string 106 issuesToVerify []string 107 } 108 109 type bucketLsConfig struct { 110 output string 111 excludeDelete bool 112 } 113 114 type bucketWebConfig struct { 115 webRoutePrefix string 116 webExternalPrefix string 117 webPrefixHeaderName string 118 webDisableCORS bool 119 interval time.Duration 120 label string 121 timeout time.Duration 122 } 123 124 type bucketReplicateConfig struct { 125 resolutions []time.Duration 126 compactMin int 127 compactMax int 128 compactions []int 129 matcherStrs string 130 singleRun bool 131 } 132 133 type bucketDownsampleConfig struct { 134 waitInterval time.Duration 135 downsampleConcurrency int 136 blockFilesConcurrency int 137 dataDir string 138 hashFunc string 139 } 140 141 type bucketCleanupConfig struct { 142 consistencyDelay time.Duration 143 blockSyncConcurrency int 144 deleteDelay time.Duration 145 } 146 147 type bucketRetentionConfig struct { 148 consistencyDelay time.Duration 149 blockSyncConcurrency int 150 deleteDelay time.Duration 151 } 152 153 type bucketMarkBlockConfig struct { 154 details string 155 marker string 156 blockIDs []string 157 removeMarker bool 158 } 159 160 func (tbc *bucketVerifyConfig) registerBucketVerifyFlag(cmd extkingpin.FlagClause) *bucketVerifyConfig { 161 cmd.Flag("repair", "Attempt to repair blocks for which issues were detected"). 162 Short('r').Default("false").BoolVar(&tbc.repair) 163 164 cmd.Flag("issues", fmt.Sprintf("Issues to verify (and optionally repair). "+ 165 "Possible issue to verify, without repair: %v; Possible issue to verify and repair: %v", 166 issuesVerifiersRegistry.VerifiersIDs(), issuesVerifiersRegistry.VerifierRepairersIDs())). 167 Short('i').Default(verifier.IndexKnownIssues{}.IssueID(), verifier.OverlappedBlocksIssue{}.IssueID()).StringsVar(&tbc.issuesToVerify) 168 169 cmd.Flag("id", "Block IDs to verify (and optionally repair) only. "+ 170 "If none is specified, all blocks will be verified. Repeated field").StringsVar(&tbc.ids) 171 return tbc 172 } 173 174 func (tbc *bucketLsConfig) registerBucketLsFlag(cmd extkingpin.FlagClause) *bucketLsConfig { 175 cmd.Flag("output", "Optional format in which to print each block's information. Options are 'json', 'wide' or a custom template."). 176 Short('o').Default("").StringVar(&tbc.output) 177 cmd.Flag("exclude-delete", "Exclude blocks marked for deletion."). 178 Default("false").BoolVar(&tbc.excludeDelete) 179 return tbc 180 } 181 182 func (tbc *bucketInspectConfig) registerBucketInspectFlag(cmd extkingpin.FlagClause) *bucketInspectConfig { 183 cmd.Flag("selector", "Selects blocks based on label, e.g. '-l key1=\\\"value1\\\" -l key2=\\\"value2\\\"'. All key value pairs must match.").Short('l'). 184 PlaceHolder("<name>=\\\"<value>\\\"").StringsVar(&tbc.selector) 185 cmd.Flag("sort-by", "Sort by columns. It's also possible to sort by multiple columns, e.g. '--sort-by FROM --sort-by UNTIL'. I.e., if the 'FROM' value is equal the rows are then further sorted by the 'UNTIL' value."). 186 Default("FROM", "UNTIL").EnumsVar(&tbc.sortBy, inspectColumns...) 187 cmd.Flag("timeout", "Timeout to download metadata from remote storage").Default("5m").DurationVar(&tbc.timeout) 188 189 return tbc 190 } 191 192 func (tbc *bucketWebConfig) registerBucketWebFlag(cmd extkingpin.FlagClause) *bucketWebConfig { 193 cmd.Flag("web.route-prefix", "Prefix for API and UI endpoints. This allows thanos UI to be served on a sub-path. Defaults to the value of --web.external-prefix. This option is analogous to --web.route-prefix of Prometheus.").Default("").StringVar(&tbc.webRoutePrefix) 194 195 cmd.Flag("web.external-prefix", "Static prefix for all HTML links and redirect URLs in the bucket web UI interface. Actual endpoints are still served on / or the web.route-prefix. This allows thanos bucket web UI to be served behind a reverse proxy that strips a URL sub-path.").Default("").StringVar(&tbc.webExternalPrefix) 196 197 cmd.Flag("web.prefix-header", "Name of HTTP request header used for dynamic prefixing of UI links and redirects. This option is ignored if web.external-prefix argument is set. Security risk: enable this option only if a reverse proxy in front of thanos is resetting the header. The --web.prefix-header=X-Forwarded-Prefix option can be useful, for example, if Thanos UI is served via Traefik reverse proxy with PathPrefixStrip option enabled, which sends the stripped prefix value in X-Forwarded-Prefix header. This allows thanos UI to be served on a sub-path.").Default("").StringVar(&tbc.webPrefixHeaderName) 198 199 cmd.Flag("web.disable-cors", "Whether to disable CORS headers to be set by Thanos. By default Thanos sets CORS headers to be allowed by all.").Default("false").BoolVar(&tbc.webDisableCORS) 200 201 cmd.Flag("refresh", "Refresh interval to download metadata from remote storage").Default("30m").DurationVar(&tbc.interval) 202 203 cmd.Flag("timeout", "Timeout to download metadata from remote storage").Default("5m").DurationVar(&tbc.timeout) 204 205 cmd.Flag("label", "External block label to use as group title").StringVar(&tbc.label) 206 return tbc 207 } 208 209 func (tbc *bucketReplicateConfig) registerBucketReplicateFlag(cmd extkingpin.FlagClause) *bucketReplicateConfig { 210 cmd.Flag("resolution", "Only blocks with these resolutions will be replicated. Repeated flag.").Default("0s", "5m", "1h").HintAction(listResLevel).DurationListVar(&tbc.resolutions) 211 212 cmd.Flag("compaction-min", "Only blocks with at least this compaction level will be replicated.").Default("1").IntVar(&tbc.compactMin) 213 214 cmd.Flag("compaction-max", "Only blocks up to a maximum of this compaction level will be replicated.").Default("4").IntVar(&tbc.compactMax) 215 216 cmd.Flag("compaction", "Only blocks with these compaction levels will be replicated. Repeated flag. Overrides compaction-min and compaction-max if set.").Default().IntsVar(&tbc.compactions) 217 218 cmd.Flag("matcher", "blocks whose external labels match this matcher will be replicated. All Prometheus matchers are supported, including =, !=, =~ and !~.").StringVar(&tbc.matcherStrs) 219 220 cmd.Flag("single-run", "Run replication only one time, then exit.").Default("false").BoolVar(&tbc.singleRun) 221 222 return tbc 223 } 224 225 func (tbc *bucketRewriteConfig) registerBucketRewriteFlag(cmd extkingpin.FlagClause) *bucketRewriteConfig { 226 cmd.Flag("id", "ID (ULID) of the blocks for rewrite (repeated flag).").Required().StringsVar(&tbc.blockIDs) 227 cmd.Flag("tmp.dir", "Working directory for temporary files").Default(filepath.Join(os.TempDir(), "thanos-rewrite")).StringVar(&tbc.tmpDir) 228 cmd.Flag("dry-run", "Prints the series changes instead of doing them. Defaults to true, for user to double check. (: Pass --no-dry-run to skip this.").Default("true").BoolVar(&tbc.dryRun) 229 cmd.Flag("prom-blocks", "If specified, we assume the blocks to be uploaded are only used with Prometheus so we don't check external labels in this case.").Default("false").BoolVar(&tbc.promBlocks) 230 cmd.Flag("delete-blocks", "Whether to delete the original blocks after rewriting blocks successfully. Available in non dry-run mode only.").Default("false").BoolVar(&tbc.deleteBlocks) 231 232 return tbc 233 } 234 235 func (tbc *bucketDownsampleConfig) registerBucketDownsampleFlag(cmd extkingpin.FlagClause) *bucketDownsampleConfig { 236 cmd.Flag("wait-interval", "Wait interval between downsample runs."). 237 Default("5m").DurationVar(&tbc.waitInterval) 238 cmd.Flag("downsample.concurrency", "Number of goroutines to use when downsampling blocks."). 239 Default("1").IntVar(&tbc.downsampleConcurrency) 240 cmd.Flag("block-files-concurrency", "Number of goroutines to use when fetching/uploading block files from object storage."). 241 Default("1").IntVar(&tbc.blockFilesConcurrency) 242 cmd.Flag("data-dir", "Data directory in which to cache blocks and process downsamplings."). 243 Default("./data").StringVar(&tbc.dataDir) 244 cmd.Flag("hash-func", "Specify which hash function to use when calculating the hashes of produced files. If no function has been specified, it does not happen. This permits avoiding downloading some files twice albeit at some performance cost. Possible values are: \"\", \"SHA256\"."). 245 Default("").EnumVar(&tbc.hashFunc, "SHA256", "") 246 247 return tbc 248 } 249 250 func (tbc *bucketMarkBlockConfig) registerBucketMarkBlockFlag(cmd extkingpin.FlagClause) *bucketMarkBlockConfig { 251 cmd.Flag("id", "ID (ULID) of the blocks to be marked for deletion (repeated flag)").Required().StringsVar(&tbc.blockIDs) 252 cmd.Flag("marker", "Marker to be put.").Required().EnumVar(&tbc.marker, metadata.DeletionMarkFilename, metadata.NoCompactMarkFilename, metadata.NoDownsampleMarkFilename) 253 cmd.Flag("details", "Human readable details to be put into marker.").StringVar(&tbc.details) 254 cmd.Flag("remove", "Remove the marker.").Default("false").BoolVar(&tbc.removeMarker) 255 return tbc 256 } 257 258 func (tbc *bucketCleanupConfig) registerBucketCleanupFlag(cmd extkingpin.FlagClause) *bucketCleanupConfig { 259 cmd.Flag("delete-delay", "Time before a block marked for deletion is deleted from bucket.").Default("48h").DurationVar(&tbc.deleteDelay) 260 cmd.Flag("consistency-delay", fmt.Sprintf("Minimum age of fresh (non-compacted) blocks before they are being processed. Malformed blocks older than the maximum of consistency-delay and %v will be removed.", compact.PartialUploadThresholdAge)). 261 Default("30m").DurationVar(&tbc.consistencyDelay) 262 cmd.Flag("block-sync-concurrency", "Number of goroutines to use when syncing block metadata from object storage."). 263 Default("20").IntVar(&tbc.blockSyncConcurrency) 264 return tbc 265 } 266 267 func (tbc *bucketRetentionConfig) registerBucketRetentionFlag(cmd extkingpin.FlagClause) *bucketRetentionConfig { 268 cmd.Flag("delete-delay", "Time before a block marked for deletion is deleted from bucket.").Default("48h").DurationVar(&tbc.deleteDelay) 269 cmd.Flag("consistency-delay", fmt.Sprintf("Minimum age of fresh (non-compacted) blocks before they are being processed. Malformed blocks older than the maximum of consistency-delay and %v will be removed.", compact.PartialUploadThresholdAge)). 270 Default("30m").DurationVar(&tbc.consistencyDelay) 271 cmd.Flag("block-sync-concurrency", "Number of goroutines to use when syncing block metadata from object storage."). 272 Default("20").IntVar(&tbc.blockSyncConcurrency) 273 274 return tbc 275 } 276 277 func registerBucket(app extkingpin.AppClause) { 278 cmd := app.Command("bucket", "Bucket utility commands") 279 280 objStoreConfig := extkingpin.RegisterCommonObjStoreFlags(cmd, "", true) 281 registerBucketVerify(cmd, objStoreConfig) 282 registerBucketLs(cmd, objStoreConfig) 283 registerBucketInspect(cmd, objStoreConfig) 284 registerBucketWeb(cmd, objStoreConfig) 285 registerBucketReplicate(cmd, objStoreConfig) 286 registerBucketDownsample(cmd, objStoreConfig) 287 registerBucketCleanup(cmd, objStoreConfig) 288 registerBucketMarkBlock(cmd, objStoreConfig) 289 registerBucketRewrite(cmd, objStoreConfig) 290 registerBucketRetention(cmd, objStoreConfig) 291 } 292 293 func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 294 cmd := app.Command("verify", "Verify all blocks in the bucket against specified issues. NOTE: Depending on issue this might take time and will need downloading all specified blocks to disk.") 295 objStoreBackupConfig := extkingpin.RegisterCommonObjStoreFlags(cmd, "-backup", false, "Used for repair logic to backup blocks before removal.") 296 297 tbc := &bucketVerifyConfig{} 298 tbc.registerBucketVerifyFlag(cmd) 299 300 deleteDelay := extkingpin.ModelDuration(cmd.Flag("delete-delay", "Duration after which blocks marked for deletion would be deleted permanently from source bucket by compactor component. "+ 301 "If delete-delay is non zero, blocks will be marked for deletion and compactor component is required to delete blocks from source bucket. "+ 302 "If delete-delay is 0, blocks will be deleted straight away. Use this if you want to get rid of or move the block immediately. "+ 303 "Note that deleting blocks immediately can cause query failures, if store gateway still has the block loaded, "+ 304 "or compactor is ignoring the deletion because it's compacting the block at the same time."). 305 Default("0s")) 306 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { 307 confContentYaml, err := objStoreConfig.Content() 308 if err != nil { 309 return err 310 } 311 312 bkt, err := client.NewBucket(logger, confContentYaml, component.Bucket.String()) 313 if err != nil { 314 return err 315 } 316 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 317 defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") 318 319 backupconfContentYaml, err := objStoreBackupConfig.Content() 320 if err != nil { 321 return err 322 } 323 324 var backupBkt objstore.Bucket 325 if len(backupconfContentYaml) == 0 { 326 if tbc.repair { 327 return errors.New("repair is specified, so backup client is required") 328 } 329 } else { 330 // nil Prometheus registerer: don't create conflicting metrics. 331 backupBkt, err = client.NewBucket(logger, backupconfContentYaml, component.Bucket.String()) 332 if err != nil { 333 return err 334 } 335 insBkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, nil, bkt.Name())) 336 337 defer runutil.CloseWithLogOnErr(logger, backupBkt, "backup bucket client") 338 } 339 340 // Dummy actor to immediately kill the group after the run function returns. 341 g.Add(func() error { return nil }, func(error) {}) 342 343 r, err := issuesVerifiersRegistry.SubstractByIDs(tbc.issuesToVerify, tbc.repair) 344 if err != nil { 345 return err 346 } 347 348 // We ignore any block that has the deletion marker file. 349 filters := []block.MetadataFilter{block.NewIgnoreDeletionMarkFilter(logger, insBkt, 0, block.FetcherConcurrency)} 350 fetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), filters) 351 if err != nil { 352 return err 353 } 354 355 var idMatcher func(ulid.ULID) bool = nil 356 if len(tbc.ids) > 0 { 357 idsMap := map[string]struct{}{} 358 for _, bid := range tbc.ids { 359 id, err := ulid.Parse(bid) 360 if err != nil { 361 return errors.Wrap(err, "invalid ULID found in --id flag") 362 } 363 idsMap[id.String()] = struct{}{} 364 } 365 366 idMatcher = func(id ulid.ULID) bool { 367 if _, ok := idsMap[id.String()]; !ok { 368 return false 369 } 370 return true 371 } 372 } 373 374 v := verifier.NewManager(reg, logger, insBkt, backupBkt, fetcher, time.Duration(*deleteDelay), r) 375 if tbc.repair { 376 return v.VerifyAndRepair(context.Background(), idMatcher) 377 } 378 379 return v.Verify(context.Background(), idMatcher) 380 }) 381 } 382 383 func registerBucketLs(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 384 cmd := app.Command("ls", "List all blocks in the bucket.") 385 386 tbc := &bucketLsConfig{} 387 tbc.registerBucketLsFlag(cmd) 388 389 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { 390 confContentYaml, err := objStoreConfig.Content() 391 if err != nil { 392 return err 393 } 394 395 bkt, err := client.NewBucket(logger, confContentYaml, component.Bucket.String()) 396 if err != nil { 397 return err 398 } 399 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 400 401 var filters []block.MetadataFilter 402 403 if tbc.excludeDelete { 404 ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, 0, block.FetcherConcurrency) 405 filters = append(filters, ignoreDeletionMarkFilter) 406 } 407 fetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), filters) 408 if err != nil { 409 return err 410 } 411 412 // Dummy actor to immediately kill the group after the run function returns. 413 g.Add(func() error { return nil }, func(error) {}) 414 415 defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") 416 417 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) 418 defer cancel() 419 420 var ( 421 format = tbc.output 422 objects = 0 423 printBlock func(m *metadata.Meta) error 424 ) 425 426 switch format { 427 case "": 428 printBlock = func(m *metadata.Meta) error { 429 fmt.Fprintln(os.Stdout, m.ULID.String()) 430 return nil 431 } 432 case "wide": 433 printBlock = func(m *metadata.Meta) error { 434 minTime := time.Unix(m.MinTime/1000, 0) 435 maxTime := time.Unix(m.MaxTime/1000, 0) 436 437 if _, err = fmt.Fprintf(os.Stdout, "%s -- %s - %s Diff: %s, Compaction: %d, Downsample: %d, Source: %s\n", 438 m.ULID, minTime.Format(time.RFC3339), maxTime.Format(time.RFC3339), maxTime.Sub(minTime), 439 m.Compaction.Level, m.Thanos.Downsample.Resolution, m.Thanos.Source); err != nil { 440 return err 441 } 442 return nil 443 } 444 case "json": 445 enc := json.NewEncoder(os.Stdout) 446 enc.SetIndent("", "\t") 447 448 printBlock = func(m *metadata.Meta) error { 449 return enc.Encode(&m) 450 } 451 default: 452 tmpl, err := template.New("").Parse(format) 453 if err != nil { 454 return errors.Wrap(err, "invalid template") 455 } 456 printBlock = func(m *metadata.Meta) error { 457 if err := tmpl.Execute(os.Stdout, &m); err != nil { 458 return errors.Wrap(err, "execute template") 459 } 460 fmt.Fprintln(os.Stdout, "") 461 return nil 462 } 463 } 464 465 metas, _, err := fetcher.Fetch(ctx) 466 if err != nil { 467 return err 468 } 469 470 for _, meta := range metas { 471 objects++ 472 if err := printBlock(meta); err != nil { 473 return errors.Wrap(err, "iter") 474 } 475 } 476 level.Info(logger).Log("msg", "ls done", "objects", objects) 477 return nil 478 }) 479 } 480 481 func registerBucketInspect(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 482 cmd := app.Command("inspect", "Inspect all blocks in the bucket in detailed, table-like way.") 483 484 tbc := &bucketInspectConfig{} 485 tbc.registerBucketInspectFlag(cmd) 486 487 output := cmd.Flag("output", "Output format for result. Currently supports table, cvs, tsv.").Default("table").Enum(outputTypes...) 488 489 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { 490 491 // Parse selector. 492 selectorLabels, err := parseFlagLabels(tbc.selector) 493 if err != nil { 494 return errors.Wrap(err, "error parsing selector flag") 495 } 496 497 confContentYaml, err := objStoreConfig.Content() 498 if err != nil { 499 return err 500 } 501 502 bkt, err := client.NewBucket(logger, confContentYaml, component.Bucket.String()) 503 if err != nil { 504 return err 505 } 506 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 507 508 fetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), nil) 509 if err != nil { 510 return err 511 } 512 513 // Dummy actor to immediately kill the group after the run function returns. 514 g.Add(func() error { return nil }, func(error) {}) 515 516 defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") 517 518 ctx, cancel := context.WithTimeout(context.Background(), tbc.timeout) 519 defer cancel() 520 521 // Getting Metas. 522 metas, _, err := fetcher.Fetch(ctx) 523 if err != nil { 524 return err 525 } 526 527 blockMetas := make([]*metadata.Meta, 0, len(metas)) 528 for _, meta := range metas { 529 blockMetas = append(blockMetas, meta) 530 } 531 532 var opPrinter tablePrinter 533 op := outputType(*output) 534 switch op { 535 case TABLE: 536 opPrinter = printTable 537 case TSV: 538 opPrinter = printTSV 539 case CSV: 540 opPrinter = printCSV 541 } 542 return printBlockData(blockMetas, selectorLabels, tbc.sortBy, opPrinter) 543 }) 544 } 545 546 // registerBucketWeb exposes a web interface for the state of remote store like `pprof web`. 547 func registerBucketWeb(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 548 cmd := app.Command("web", "Web interface for remote storage bucket.") 549 httpBindAddr, httpGracePeriod, httpTLSConfig := extkingpin.RegisterHTTPFlags(cmd) 550 551 tbc := &bucketWebConfig{} 552 tbc.registerBucketWebFlag(cmd) 553 554 filterConf := &store.FilterConfig{} 555 cmd.Flag("min-time", "Start of time range limit to serve. Thanos tool bucket web will serve only blocks, which happened later than this value. Option can be a constant time in RFC3339 format or time duration relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y."). 556 Default("0000-01-01T00:00:00Z").SetValue(&filterConf.MinTime) 557 cmd.Flag("max-time", "End of time range limit to serve. Thanos tool bucket web will serve only blocks, which happened earlier than this value. Option can be a constant time in RFC3339 format or time duration relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y."). 558 Default("9999-12-31T23:59:59Z").SetValue(&filterConf.MaxTime) 559 selectorRelabelConf := *extkingpin.RegisterSelectorRelabelFlags(cmd) 560 561 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error { 562 comp := component.Bucket 563 httpProbe := prober.NewHTTP() 564 statusProber := prober.Combine( 565 httpProbe, 566 prober.NewInstrumentation(comp, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)), 567 ) 568 569 srv := httpserver.New(logger, reg, comp, httpProbe, 570 httpserver.WithListen(*httpBindAddr), 571 httpserver.WithGracePeriod(time.Duration(*httpGracePeriod)), 572 httpserver.WithTLSConfig(*httpTLSConfig), 573 ) 574 575 if tbc.webRoutePrefix == "" { 576 tbc.webRoutePrefix = tbc.webExternalPrefix 577 } 578 579 if tbc.webRoutePrefix != tbc.webExternalPrefix { 580 level.Warn(logger).Log("msg", "different values for --web.route-prefix and --web.external-prefix detected, web UI may not work without a reverse-proxy.") 581 } 582 583 router := route.New() 584 585 // RoutePrefix must always start with '/'. 586 tbc.webRoutePrefix = "/" + strings.Trim(tbc.webRoutePrefix, "/") 587 588 // Redirect from / to /webRoutePrefix. 589 if tbc.webRoutePrefix != "/" { 590 router.Get("/", func(w http.ResponseWriter, r *http.Request) { 591 http.Redirect(w, r, tbc.webRoutePrefix+"/", http.StatusFound) 592 }) 593 router.Get(tbc.webRoutePrefix, func(w http.ResponseWriter, r *http.Request) { 594 http.Redirect(w, r, tbc.webRoutePrefix+"/", http.StatusFound) 595 }) 596 router = router.WithPrefix(tbc.webRoutePrefix) 597 } 598 599 ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) 600 601 bucketUI := ui.NewBucketUI(logger, tbc.webExternalPrefix, tbc.webPrefixHeaderName, component.Bucket) 602 bucketUI.Register(router, ins) 603 604 flagsMap := getFlagsMap(cmd.Flags()) 605 606 confContentYaml, err := objStoreConfig.Content() 607 if err != nil { 608 return err 609 } 610 611 bkt, err := client.NewBucket(logger, confContentYaml, component.Bucket.String()) 612 if err != nil { 613 return errors.Wrap(err, "bucket client") 614 } 615 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 616 617 api := v1.NewBlocksAPI(logger, tbc.webDisableCORS, tbc.label, flagsMap, insBkt) 618 619 // Configure Request Logging for HTTP calls. 620 opts := []logging.Option{logging.WithDecider(func(_ string, _ error) logging.Decision { 621 return logging.NoLogCall 622 })} 623 logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...) 624 625 api.Register(router.WithPrefix("/api/v1"), tracer, logger, ins, logMiddleware) 626 627 srv.Handle("/", router) 628 629 if tbc.interval < 5*time.Minute { 630 level.Warn(logger).Log("msg", "Refreshing more often than 5m could lead to large data transfers") 631 } 632 633 if tbc.timeout < time.Minute { 634 level.Warn(logger).Log("msg", "Timeout less than 1m could lead to frequent failures") 635 } 636 637 if tbc.interval < (tbc.timeout * 2) { 638 level.Warn(logger).Log("msg", "Refresh interval should be at least 2 times the timeout") 639 } 640 641 relabelContentYaml, err := selectorRelabelConf.Content() 642 if err != nil { 643 return errors.Wrap(err, "get content of relabel configuration") 644 } 645 646 relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions) 647 if err != nil { 648 return err 649 } 650 // TODO(bwplotka): Allow Bucket UI to visualize the state of block as well. 651 fetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), 652 []block.MetadataFilter{ 653 block.NewTimePartitionMetaFilter(filterConf.MinTime, filterConf.MaxTime), 654 block.NewLabelShardedMetaFilter(relabelConfig), 655 block.NewDeduplicateFilter(block.FetcherConcurrency), 656 }) 657 if err != nil { 658 return err 659 } 660 fetcher.UpdateOnChange(func(blocks []metadata.Meta, err error) { 661 api.SetGlobal(blocks, err) 662 }) 663 664 ctx, cancel := context.WithCancel(context.Background()) 665 g.Add(func() error { 666 statusProber.Ready() 667 defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") 668 return runutil.Repeat(tbc.interval, ctx.Done(), func() error { 669 return runutil.RetryWithLog(logger, time.Minute, ctx.Done(), func() error { 670 iterCtx, iterCancel := context.WithTimeout(ctx, tbc.timeout) 671 defer iterCancel() 672 673 _, _, err := fetcher.Fetch(iterCtx) 674 return err 675 }) 676 }) 677 }, func(error) { 678 cancel() 679 }) 680 681 g.Add(func() error { 682 statusProber.Healthy() 683 684 return srv.ListenAndServe() 685 }, func(err error) { 686 statusProber.NotReady(err) 687 defer statusProber.NotHealthy(err) 688 689 srv.Shutdown(err) 690 }) 691 692 return nil 693 }) 694 } 695 696 // Provide a list of resolution, can not use Enum directly, since string does not implement int64 function. 697 func listResLevel() []string { 698 return []string{ 699 time.Duration(downsample.ResLevel0).String(), 700 time.Duration(downsample.ResLevel1).String(), 701 time.Duration(downsample.ResLevel2).String()} 702 } 703 704 func registerBucketReplicate(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 705 cmd := app.Command("replicate", fmt.Sprintf("Replicate data from one object storage to another. NOTE: Currently it works only with Thanos blocks (%v has to have Thanos metadata).", block.MetaFilename)) 706 httpBindAddr, httpGracePeriod, httpTLSConfig := extkingpin.RegisterHTTPFlags(cmd) 707 toObjStoreConfig := extkingpin.RegisterCommonObjStoreFlags(cmd, "-to", false, "The object storage which replicate data to.") 708 709 tbc := &bucketReplicateConfig{} 710 tbc.registerBucketReplicateFlag(cmd) 711 712 minTime := model.TimeOrDuration(cmd.Flag("min-time", "Start of time range limit to replicate. Thanos Replicate will replicate only metrics, which happened later than this value. Option can be a constant time in RFC3339 format or time duration relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y."). 713 Default("0000-01-01T00:00:00Z")) 714 maxTime := model.TimeOrDuration(cmd.Flag("max-time", "End of time range limit to replicate. Thanos Replicate will replicate only metrics, which happened earlier than this value. Option can be a constant time in RFC3339 format or time duration relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y."). 715 Default("9999-12-31T23:59:59Z")) 716 ids := cmd.Flag("id", "Block to be replicated to the destination bucket. IDs will be used to match blocks and other matchers will be ignored. When specified, this command will be run only once after successful replication. Repeated field").Strings() 717 ignoreMarkedForDeletion := cmd.Flag("ignore-marked-for-deletion", "Do not replicate blocks that have deletion mark.").Bool() 718 719 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error { 720 matchers, err := replicate.ParseFlagMatchers(tbc.matcherStrs) 721 if err != nil { 722 return errors.Wrap(err, "parse block label matchers") 723 } 724 725 var resolutionLevels []compact.ResolutionLevel 726 for _, lvl := range tbc.resolutions { 727 resolutionLevels = append(resolutionLevels, compact.ResolutionLevel(lvl.Milliseconds())) 728 } 729 730 if len(tbc.compactions) == 0 { 731 if tbc.compactMin > tbc.compactMax { 732 return errors.New("compaction-min must be less than or equal to compaction-max") 733 } 734 tbc.compactions = []int{} 735 for compactionLevel := tbc.compactMin; compactionLevel <= tbc.compactMax; compactionLevel++ { 736 tbc.compactions = append(tbc.compactions, compactionLevel) 737 } 738 } 739 740 blockIDs := make([]ulid.ULID, 0, len(*ids)) 741 for _, id := range *ids { 742 bid, err := ulid.Parse(id) 743 if err != nil { 744 return errors.Wrap(err, "invalid ULID found in --id flag") 745 } 746 blockIDs = append(blockIDs, bid) 747 } 748 749 return replicate.RunReplicate( 750 g, 751 logger, 752 reg, 753 tracer, 754 *httpBindAddr, 755 *httpTLSConfig, 756 time.Duration(*httpGracePeriod), 757 matchers, 758 resolutionLevels, 759 tbc.compactions, 760 objStoreConfig, 761 toObjStoreConfig, 762 tbc.singleRun, 763 minTime, 764 maxTime, 765 blockIDs, 766 *ignoreMarkedForDeletion, 767 ) 768 }) 769 } 770 771 func registerBucketDownsample(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 772 cmd := app.Command(component.Downsample.String(), "Continuously downsamples blocks in an object store bucket.") 773 httpAddr, httpGracePeriod, httpTLSConfig := extkingpin.RegisterHTTPFlags(cmd) 774 775 tbc := &bucketDownsampleConfig{} 776 tbc.registerBucketDownsampleFlag(cmd) 777 778 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error { 779 return RunDownsample(g, logger, reg, *httpAddr, *httpTLSConfig, time.Duration(*httpGracePeriod), tbc.dataDir, 780 tbc.waitInterval, tbc.downsampleConcurrency, tbc.blockFilesConcurrency, objStoreConfig, component.Downsample, metadata.HashFunc(tbc.hashFunc)) 781 }) 782 } 783 784 func registerBucketCleanup(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 785 cmd := app.Command(component.Cleanup.String(), "Cleans up all blocks marked for deletion.") 786 787 tbc := &bucketCleanupConfig{} 788 tbc.registerBucketCleanupFlag(cmd) 789 790 selectorRelabelConf := extkingpin.RegisterSelectorRelabelFlags(cmd) 791 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { 792 confContentYaml, err := objStoreConfig.Content() 793 if err != nil { 794 return err 795 } 796 797 relabelContentYaml, err := selectorRelabelConf.Content() 798 if err != nil { 799 return errors.Wrap(err, "get content of relabel configuration") 800 } 801 802 relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions) 803 if err != nil { 804 return err 805 } 806 807 bkt, err := client.NewBucket(logger, confContentYaml, component.Cleanup.String()) 808 if err != nil { 809 return err 810 } 811 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 812 813 // Dummy actor to immediately kill the group after the run function returns. 814 g.Add(func() error { return nil }, func(error) {}) 815 816 stubCounter := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) 817 818 // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. 819 // The delay of deleteDelay/2 is added to ensure we fetch blocks that are meant to be deleted but do not have a replacement yet. 820 // This is to make sure compactor will not accidentally perform compactions with gap instead. 821 ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, tbc.deleteDelay/2, tbc.blockSyncConcurrency) 822 duplicateBlocksFilter := block.NewDeduplicateFilter(tbc.blockSyncConcurrency) 823 blocksCleaner := compact.NewBlocksCleaner(logger, insBkt, ignoreDeletionMarkFilter, tbc.deleteDelay, stubCounter, stubCounter) 824 825 ctx := context.Background() 826 827 var sy *compact.Syncer 828 { 829 baseMetaFetcher, err := block.NewBaseFetcher(logger, tbc.blockSyncConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg)) 830 if err != nil { 831 return errors.Wrap(err, "create meta fetcher") 832 } 833 cf := baseMetaFetcher.NewMetaFetcher( 834 extprom.WrapRegistererWithPrefix(extpromPrefix, reg), []block.MetadataFilter{ 835 block.NewLabelShardedMetaFilter(relabelConfig), 836 block.NewConsistencyDelayMetaFilter(logger, tbc.consistencyDelay, extprom.WrapRegistererWithPrefix(extpromPrefix, reg)), 837 ignoreDeletionMarkFilter, 838 duplicateBlocksFilter, 839 }, 840 ) 841 sy, err = compact.NewMetaSyncer( 842 logger, 843 reg, 844 insBkt, 845 cf, 846 duplicateBlocksFilter, 847 ignoreDeletionMarkFilter, 848 stubCounter, 849 stubCounter, 850 ) 851 if err != nil { 852 return errors.Wrap(err, "create syncer") 853 } 854 } 855 856 level.Info(logger).Log("msg", "syncing blocks metadata") 857 if err := sy.SyncMetas(ctx); err != nil { 858 return errors.Wrap(err, "sync blocks") 859 } 860 861 level.Info(logger).Log("msg", "synced blocks done") 862 863 compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), insBkt, stubCounter, stubCounter, stubCounter) 864 if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { 865 return errors.Wrap(err, "error cleaning blocks") 866 } 867 868 level.Info(logger).Log("msg", "cleanup done") 869 return nil 870 }) 871 } 872 873 type tablePrinter func(w io.Writer, t Table) error 874 875 func printTable(w io.Writer, t Table) error { 876 table := tablewriter.NewWriter(w) 877 table.SetHeader(t.Header) 878 table.SetBorders(tablewriter.Border{Left: true, Top: false, Right: true, Bottom: false}) 879 table.SetCenterSeparator("|") 880 table.SetAutoWrapText(false) 881 table.SetReflowDuringAutoWrap(false) 882 table.SetAlignment(tablewriter.ALIGN_LEFT) 883 table.AppendBulk(t.Lines) 884 table.Render() 885 return nil 886 } 887 888 func printCSV(w io.Writer, t Table) error { 889 csv := csv.NewWriter(w) 890 err := csv.Write(t.Header) 891 if err != nil { 892 return err 893 } 894 err = csv.WriteAll(t.Lines) 895 if err != nil { 896 return err 897 } 898 csv.Flush() 899 return nil 900 } 901 902 func newTSVWriter(w io.Writer) *csv.Writer { 903 writer := csv.NewWriter(w) 904 writer.Comma = rune('\t') 905 return writer 906 } 907 908 func printTSV(w io.Writer, t Table) error { 909 tsv := newTSVWriter(w) 910 err := tsv.Write(t.Header) 911 if err != nil { 912 return err 913 } 914 err = tsv.WriteAll(t.Lines) 915 if err != nil { 916 return err 917 } 918 tsv.Flush() 919 return nil 920 } 921 922 func printBlockData(blockMetas []*metadata.Meta, selectorLabels labels.Labels, sortBy []string, printer tablePrinter) error { 923 header := inspectColumns 924 925 var lines [][]string 926 p := message.NewPrinter(language.English) 927 928 for _, blockMeta := range blockMetas { 929 if !matchesSelector(blockMeta, selectorLabels) { 930 continue 931 } 932 933 timeRange := time.Duration((blockMeta.MaxTime - blockMeta.MinTime) * int64(time.Millisecond)) 934 935 untilDown := "-" 936 if until, err := compact.UntilNextDownsampling(blockMeta); err == nil { 937 untilDown = until.String() 938 } 939 var labels []string 940 for _, key := range getKeysAlphabetically(blockMeta.Thanos.Labels) { 941 labels = append(labels, fmt.Sprintf("%s=%s", key, blockMeta.Thanos.Labels[key])) 942 } 943 944 var line []string 945 line = append(line, 946 blockMeta.ULID.String(), 947 time.Unix(blockMeta.MinTime/1000, 0).Format(time.RFC3339), 948 time.Unix(blockMeta.MaxTime/1000, 0).Format(time.RFC3339), 949 timeRange.String(), 950 untilDown, 951 p.Sprintf("%d", blockMeta.Stats.NumSeries), 952 p.Sprintf("%d", blockMeta.Stats.NumSamples), 953 p.Sprintf("%d", blockMeta.Stats.NumChunks), 954 p.Sprintf("%d", blockMeta.Compaction.Level), 955 p.Sprintf("%t", blockMeta.Compaction.Failed), 956 strings.Join(labels, ","), 957 time.Duration(blockMeta.Thanos.Downsample.Resolution*int64(time.Millisecond)).String(), 958 string(blockMeta.Thanos.Source)) 959 960 lines = append(lines, line) 961 } 962 963 var sortByColNum []int 964 for _, col := range sortBy { 965 index := getIndex(header, col) 966 if index == -1 { 967 return errors.Errorf("column %s not found", col) 968 } 969 sortByColNum = append(sortByColNum, index) 970 } 971 972 t := Table{Header: header, Lines: lines, SortIndices: sortByColNum} 973 sort.Sort(t) 974 err := printer(os.Stdout, t) 975 if err != nil { 976 return errors.Errorf("unable to write output.") 977 } 978 return nil 979 } 980 981 func getKeysAlphabetically(labels map[string]string) []string { 982 var keys []string 983 for k := range labels { 984 keys = append(keys, k) 985 } 986 sort.Strings(keys) 987 return keys 988 } 989 990 // matchesSelector checks if blockMeta contains every label from 991 // the selector with the correct value. 992 func matchesSelector(blockMeta *metadata.Meta, selectorLabels labels.Labels) bool { 993 for _, l := range selectorLabels { 994 if v, ok := blockMeta.Thanos.Labels[l.Name]; !ok || v != l.Value { 995 return false 996 } 997 } 998 return true 999 } 1000 1001 // getIndex calculates the index of s in strs. 1002 func getIndex(strs []string, s string) int { 1003 for i, col := range strs { 1004 if col == s { 1005 return i 1006 } 1007 } 1008 return -1 1009 } 1010 1011 type Table struct { 1012 Header []string 1013 Lines [][]string 1014 SortIndices []int 1015 } 1016 1017 func (t Table) Len() int { return len(t.Lines) } 1018 1019 func (t Table) Swap(i, j int) { t.Lines[i], t.Lines[j] = t.Lines[j], t.Lines[i] } 1020 1021 func (t Table) Less(i, j int) bool { 1022 for _, index := range t.SortIndices { 1023 if t.Lines[i][index] == t.Lines[j][index] { 1024 continue 1025 } 1026 return compare(t.Lines[i][index], t.Lines[j][index]) 1027 } 1028 return compare(t.Lines[i][0], t.Lines[j][0]) 1029 } 1030 1031 func compare(s1, s2 string) bool { 1032 // Values can be either Time, Duration, comma-delimited integers or strings. 1033 s1Time, s1Err := time.Parse(time.RFC3339, s1) 1034 s2Time, s2Err := time.Parse(time.RFC3339, s2) 1035 if s1Err != nil || s2Err != nil { 1036 s1Duration, s1Err := time.ParseDuration(s1) 1037 s2Duration, s2Err := time.ParseDuration(s2) 1038 if s1Err != nil || s2Err != nil { 1039 s1Int, s1Err := strconv.ParseUint(strings.ReplaceAll(s1, ",", ""), 10, 64) 1040 s2Int, s2Err := strconv.ParseUint(strings.ReplaceAll(s2, ",", ""), 10, 64) 1041 if s1Err != nil || s2Err != nil { 1042 return s1 < s2 1043 } 1044 return s1Int < s2Int 1045 } 1046 return s1Duration < s2Duration 1047 } 1048 return s1Time.Before(s2Time) 1049 } 1050 1051 func registerBucketMarkBlock(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 1052 cmd := app.Command(component.Mark.String(), "Mark block for deletion or no-compact in a safe way. NOTE: If the compactor is currently running compacting same block, this operation would be potentially a noop.") 1053 1054 tbc := &bucketMarkBlockConfig{} 1055 tbc.registerBucketMarkBlockFlag(cmd) 1056 1057 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { 1058 confContentYaml, err := objStoreConfig.Content() 1059 if err != nil { 1060 return err 1061 } 1062 1063 bkt, err := client.NewBucket(logger, confContentYaml, component.Mark.String()) 1064 if err != nil { 1065 return err 1066 } 1067 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 1068 1069 var ids []ulid.ULID 1070 for _, id := range tbc.blockIDs { 1071 u, err := ulid.Parse(id) 1072 if err != nil { 1073 return errors.Errorf("block.id is not a valid UUID, got: %v", id) 1074 } 1075 ids = append(ids, u) 1076 } 1077 1078 if !tbc.removeMarker && tbc.details == "" { 1079 return errors.Errorf("required flag --details not provided") 1080 } 1081 1082 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) 1083 g.Add(func() error { 1084 for _, id := range ids { 1085 if tbc.removeMarker { 1086 err := block.RemoveMark(ctx, logger, insBkt, id, promauto.With(nil).NewCounter(prometheus.CounterOpts{}), tbc.marker) 1087 if err != nil { 1088 return errors.Wrapf(err, "remove mark %v for %v", id, tbc.marker) 1089 } 1090 continue 1091 } 1092 switch tbc.marker { 1093 case metadata.DeletionMarkFilename: 1094 if err := block.MarkForDeletion(ctx, logger, insBkt, id, tbc.details, promauto.With(nil).NewCounter(prometheus.CounterOpts{})); err != nil { 1095 return errors.Wrapf(err, "mark %v for %v", id, tbc.marker) 1096 } 1097 case metadata.NoCompactMarkFilename: 1098 if err := block.MarkForNoCompact(ctx, logger, insBkt, id, metadata.ManualNoCompactReason, tbc.details, promauto.With(nil).NewCounter(prometheus.CounterOpts{})); err != nil { 1099 return errors.Wrapf(err, "mark %v for %v", id, tbc.marker) 1100 } 1101 case metadata.NoDownsampleMarkFilename: 1102 if err := block.MarkForNoDownsample(ctx, logger, insBkt, id, metadata.ManualNoDownsampleReason, tbc.details, promauto.With(nil).NewCounter(prometheus.CounterOpts{})); err != nil { 1103 return errors.Wrapf(err, "mark %v for %v", id, tbc.marker) 1104 } 1105 default: 1106 return errors.Errorf("not supported marker %v", tbc.marker) 1107 } 1108 } 1109 level.Info(logger).Log("msg", "marking done", "marker", tbc.marker, "IDs", strings.Join(tbc.blockIDs, ",")) 1110 return nil 1111 }, func(err error) { 1112 cancel() 1113 }) 1114 return nil 1115 }) 1116 } 1117 1118 func registerBucketRewrite(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 1119 cmd := app.Command(component.Rewrite.String(), "Rewrite chosen blocks in the bucket, while deleting or modifying series "+ 1120 "Resulted block has modified stats in meta.json. Additionally compaction.sources are altered to not confuse readers of meta.json. "+ 1121 "Instead thanos.rewrite section is added with useful info like old sources and deletion requests. "+ 1122 "NOTE: It's recommended to turn off compactor while doing this operation. If the compactor is running and touching exactly same block that "+ 1123 "is being rewritten, the resulted rewritten block might only cause overlap (mitigated by marking overlapping block manually for deletion) "+ 1124 "and the data you wanted to rewrite could already part of bigger block.\n\n"+ 1125 "Use FILESYSTEM type of bucket to rewrite block on disk (suitable for vanilla Prometheus) "+ 1126 "After rewrite, it's caller responsibility to delete or mark source block for deletion to avoid overlaps. "+ 1127 "WARNING: This procedure is *IRREVERSIBLE* after certain time (delete delay), so do backup your blocks first.") 1128 1129 tbc := &bucketRewriteConfig{} 1130 tbc.registerBucketRewriteFlag(cmd) 1131 1132 hashFunc := cmd.Flag("hash-func", "Specify which hash function to use when calculating the hashes of produced files. If no function has been specified, it does not happen. This permits avoiding downloading some files twice albeit at some performance cost. Possible values are: \"\", \"SHA256\"."). 1133 Default("").Enum("SHA256", "") 1134 toDelete := extflag.RegisterPathOrContent(cmd, "rewrite.to-delete-config", "YAML file that contains []metadata.DeletionRequest that will be applied to blocks", extflag.WithEnvSubstitution()) 1135 toRelabel := extflag.RegisterPathOrContent(cmd, "rewrite.to-relabel-config", "YAML file that contains relabel configs that will be applied to blocks", extflag.WithEnvSubstitution()) 1136 provideChangeLog := cmd.Flag("rewrite.add-change-log", "If specified, all modifications are written to new block directory. Disable if latency is to high.").Default("true").Bool() 1137 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { 1138 confContentYaml, err := objStoreConfig.Content() 1139 if err != nil { 1140 return err 1141 } 1142 1143 bkt, err := client.NewBucket(logger, confContentYaml, component.Rewrite.String()) 1144 if err != nil { 1145 return err 1146 } 1147 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 1148 1149 var modifiers []compactv2.Modifier 1150 1151 relabelYaml, err := toRelabel.Content() 1152 if err != nil { 1153 return err 1154 } 1155 var relabels []*relabel.Config 1156 if len(relabelYaml) > 0 { 1157 relabels, err = block.ParseRelabelConfig(relabelYaml, nil) 1158 if err != nil { 1159 return err 1160 } 1161 modifiers = append(modifiers, compactv2.WithRelabelModifier(relabels...)) 1162 } 1163 1164 deletionsYaml, err := toDelete.Content() 1165 if err != nil { 1166 return err 1167 } 1168 var deletions []metadata.DeletionRequest 1169 if len(deletionsYaml) > 0 { 1170 if err := yaml.Unmarshal(deletionsYaml, &deletions); err != nil { 1171 return err 1172 } 1173 modifiers = append(modifiers, compactv2.WithDeletionModifier(deletions...)) 1174 } 1175 1176 if len(modifiers) == 0 { 1177 return errors.New("rewrite configuration should be provided") 1178 } 1179 1180 var ids []ulid.ULID 1181 for _, id := range tbc.blockIDs { 1182 u, err := ulid.Parse(id) 1183 if err != nil { 1184 return errors.Errorf("id is not a valid block ULID, got: %v", id) 1185 } 1186 ids = append(ids, u) 1187 } 1188 1189 if err := os.RemoveAll(tbc.tmpDir); err != nil { 1190 return err 1191 } 1192 if err := os.MkdirAll(tbc.tmpDir, os.ModePerm); err != nil { 1193 return err 1194 } 1195 1196 ctx, cancel := context.WithCancel(context.Background()) 1197 g.Add(func() error { 1198 chunkPool := chunkenc.NewPool() 1199 changeLog := compactv2.NewChangeLog(io.Discard) 1200 stubCounter := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) 1201 for _, id := range ids { 1202 // Delete series from block & modify. 1203 level.Info(logger).Log("msg", "downloading block", "source", id) 1204 if err := block.Download(ctx, logger, insBkt, id, filepath.Join(tbc.tmpDir, id.String())); err != nil { 1205 return errors.Wrapf(err, "download %v", id) 1206 } 1207 1208 meta, err := metadata.ReadFromDir(filepath.Join(tbc.tmpDir, id.String())) 1209 if err != nil { 1210 return errors.Wrapf(err, "read meta of %v", id) 1211 } 1212 b, err := tsdb.OpenBlock(logger, filepath.Join(tbc.tmpDir, id.String()), chunkPool) 1213 if err != nil { 1214 return errors.Wrapf(err, "open block %v", id) 1215 } 1216 1217 p := compactv2.NewProgressLogger(logger, int(b.Meta().Stats.NumSeries)) 1218 newID := ulid.MustNew(ulid.Now(), rand.Reader) 1219 meta.ULID = newID 1220 meta.Thanos.Rewrites = append(meta.Thanos.Rewrites, metadata.Rewrite{ 1221 Sources: meta.Compaction.Sources, 1222 DeletionsApplied: deletions, 1223 RelabelsApplied: relabels, 1224 }) 1225 meta.Compaction.Sources = []ulid.ULID{newID} 1226 meta.Thanos.Source = metadata.BucketRewriteSource 1227 1228 if err := os.MkdirAll(filepath.Join(tbc.tmpDir, newID.String()), os.ModePerm); err != nil { 1229 return err 1230 } 1231 1232 if *provideChangeLog { 1233 f, err := os.OpenFile(filepath.Join(tbc.tmpDir, newID.String(), "change.log"), os.O_CREATE|os.O_WRONLY, os.ModePerm) 1234 if err != nil { 1235 return err 1236 } 1237 defer runutil.CloseWithLogOnErr(logger, f, "close changelog") 1238 1239 changeLog = compactv2.NewChangeLog(f) 1240 level.Info(logger).Log("msg", "changelog will be available", "file", filepath.Join(tbc.tmpDir, newID.String(), "change.log")) 1241 } 1242 1243 d, err := block.NewDiskWriter(ctx, logger, filepath.Join(tbc.tmpDir, newID.String())) 1244 if err != nil { 1245 return err 1246 } 1247 1248 var comp *compactv2.Compactor 1249 if tbc.dryRun { 1250 comp = compactv2.NewDryRun(tbc.tmpDir, logger, changeLog, chunkPool) 1251 } else { 1252 comp = compactv2.New(tbc.tmpDir, logger, changeLog, chunkPool) 1253 } 1254 1255 level.Info(logger).Log("msg", "starting rewrite for block", "source", id, "new", newID, "toDelete", string(deletionsYaml), "toRelabel", string(relabelYaml)) 1256 if err := comp.WriteSeries(ctx, []block.Reader{b}, d, p, modifiers...); err != nil { 1257 return errors.Wrapf(err, "writing series from %v to %v", id, newID) 1258 } 1259 1260 if tbc.dryRun { 1261 level.Info(logger).Log("msg", "dry run finished. Changes should be printed to stderr", "Block ID", id) 1262 continue 1263 } 1264 1265 level.Info(logger).Log("msg", "wrote new block after modifications; flushing", "source", id, "new", newID) 1266 meta.Stats, err = d.Flush() 1267 if err != nil { 1268 return errors.Wrap(err, "flush") 1269 } 1270 if err := meta.WriteToDir(logger, filepath.Join(tbc.tmpDir, newID.String())); err != nil { 1271 return err 1272 } 1273 1274 level.Info(logger).Log("msg", "uploading new block", "source", id, "new", newID) 1275 if tbc.promBlocks { 1276 if err := block.UploadPromBlock(ctx, logger, insBkt, filepath.Join(tbc.tmpDir, newID.String()), metadata.HashFunc(*hashFunc)); err != nil { 1277 return errors.Wrap(err, "upload") 1278 } 1279 } else { 1280 if err := block.Upload(ctx, logger, insBkt, filepath.Join(tbc.tmpDir, newID.String()), metadata.HashFunc(*hashFunc)); err != nil { 1281 return errors.Wrap(err, "upload") 1282 } 1283 } 1284 level.Info(logger).Log("msg", "uploaded", "source", id, "new", newID) 1285 1286 if !tbc.dryRun && tbc.deleteBlocks { 1287 if err := block.MarkForDeletion(ctx, logger, insBkt, id, "block rewritten", stubCounter); err != nil { 1288 level.Error(logger).Log("msg", "failed to mark block for deletion", "id", id.String(), "err", err) 1289 } 1290 } 1291 } 1292 level.Info(logger).Log("msg", "rewrite done", "IDs", strings.Join(tbc.blockIDs, ",")) 1293 return nil 1294 }, func(err error) { 1295 cancel() 1296 }) 1297 return nil 1298 }) 1299 } 1300 1301 func registerBucketRetention(app extkingpin.AppClause, objStoreConfig *extflag.PathOrContent) { 1302 var ( 1303 retentionRaw, retentionFiveMin, retentionOneHr prommodel.Duration 1304 ) 1305 1306 cmd := app.Command("retention", "Retention applies retention policies on the given bucket. Please make sure no compactor is running on the same bucket at the same time.") 1307 1308 tbc := &bucketRetentionConfig{} 1309 tbc.registerBucketRetentionFlag(cmd) 1310 1311 selectorRelabelConf := extkingpin.RegisterSelectorRelabelFlags(cmd) 1312 cmd.Flag("retention.resolution-raw", 1313 "How long to retain raw samples in bucket. Setting this to 0d will retain samples of this resolution forever"). 1314 Default("0d").SetValue(&retentionRaw) 1315 cmd.Flag("retention.resolution-5m", "How long to retain samples of resolution 1 (5 minutes) in bucket. Setting this to 0d will retain samples of this resolution forever"). 1316 Default("0d").SetValue(&retentionFiveMin) 1317 cmd.Flag("retention.resolution-1h", "How long to retain samples of resolution 2 (1 hour) in bucket. Setting this to 0d will retain samples of this resolution forever"). 1318 Default("0d").SetValue(&retentionOneHr) 1319 cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error { 1320 retentionByResolution := map[compact.ResolutionLevel]time.Duration{ 1321 compact.ResolutionLevelRaw: time.Duration(retentionRaw), 1322 compact.ResolutionLevel5m: time.Duration(retentionFiveMin), 1323 compact.ResolutionLevel1h: time.Duration(retentionOneHr), 1324 } 1325 1326 if retentionByResolution[compact.ResolutionLevelRaw].Seconds() != 0 { 1327 level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) 1328 } 1329 if retentionByResolution[compact.ResolutionLevel5m].Seconds() != 0 { 1330 level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m]) 1331 } 1332 if retentionByResolution[compact.ResolutionLevel1h].Seconds() != 0 { 1333 level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) 1334 } 1335 1336 confContentYaml, err := objStoreConfig.Content() 1337 if err != nil { 1338 return err 1339 } 1340 1341 relabelContentYaml, err := selectorRelabelConf.Content() 1342 if err != nil { 1343 return errors.Wrap(err, "get content of relabel configuration") 1344 } 1345 1346 relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions) 1347 if err != nil { 1348 return err 1349 } 1350 1351 bkt, err := client.NewBucket(logger, confContentYaml, component.Retention.String()) 1352 if err != nil { 1353 return err 1354 } 1355 insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name())) 1356 1357 // Dummy actor to immediately kill the group after the run function returns. 1358 g.Add(func() error { return nil }, func(error) {}) 1359 1360 defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client") 1361 1362 // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. 1363 // The delay of deleteDelay/2 is added to ensure we fetch blocks that are meant to be deleted but do not have a replacement yet. 1364 // This is to make sure compactor will not accidentally perform compactions with gap instead. 1365 ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, tbc.deleteDelay/2, tbc.blockSyncConcurrency) 1366 duplicateBlocksFilter := block.NewDeduplicateFilter(tbc.blockSyncConcurrency) 1367 stubCounter := promauto.With(nil).NewCounter(prometheus.CounterOpts{}) 1368 1369 var sy *compact.Syncer 1370 { 1371 baseMetaFetcher, err := block.NewBaseFetcher(logger, tbc.blockSyncConcurrency, insBkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg)) 1372 if err != nil { 1373 return errors.Wrap(err, "create meta fetcher") 1374 } 1375 cf := baseMetaFetcher.NewMetaFetcher( 1376 extprom.WrapRegistererWithPrefix(extpromPrefix, reg), []block.MetadataFilter{ 1377 block.NewLabelShardedMetaFilter(relabelConfig), 1378 block.NewConsistencyDelayMetaFilter(logger, tbc.consistencyDelay, extprom.WrapRegistererWithPrefix(extpromPrefix, reg)), 1379 duplicateBlocksFilter, 1380 ignoreDeletionMarkFilter, 1381 }, 1382 ) 1383 sy, err = compact.NewMetaSyncer( 1384 logger, 1385 reg, 1386 insBkt, 1387 cf, 1388 duplicateBlocksFilter, 1389 ignoreDeletionMarkFilter, 1390 stubCounter, 1391 stubCounter, 1392 ) 1393 if err != nil { 1394 return errors.Wrap(err, "create syncer") 1395 } 1396 } 1397 1398 ctx := context.Background() 1399 level.Info(logger).Log("msg", "syncing blocks metadata") 1400 if err := sy.SyncMetas(ctx); err != nil { 1401 return errors.Wrap(err, "sync blocks") 1402 } 1403 1404 level.Info(logger).Log("msg", "synced blocks done") 1405 1406 level.Warn(logger).Log("msg", "GLOBAL COMPACTOR SHOULD __NOT__ BE RUNNING ON THE SAME BUCKET") 1407 1408 if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, insBkt, sy.Metas(), retentionByResolution, stubCounter); err != nil { 1409 return errors.Wrap(err, "retention failed") 1410 } 1411 return nil 1412 }) 1413 }