github.com/thanos-io/thanos@v0.32.5/pkg/replicate/replicator.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package replicate 5 6 import ( 7 "context" 8 "math/rand" 9 "time" 10 11 extflag "github.com/efficientgo/tools/extkingpin" 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/oklog/run" 15 "github.com/oklog/ulid" 16 "github.com/opentracing/opentracing-go" 17 "github.com/pkg/errors" 18 amlabels "github.com/prometheus/alertmanager/pkg/labels" 19 "github.com/prometheus/client_golang/prometheus" 20 "github.com/prometheus/client_golang/prometheus/promauto" 21 "github.com/prometheus/common/model" 22 "github.com/prometheus/prometheus/model/labels" 23 24 "github.com/thanos-io/objstore" 25 "github.com/thanos-io/objstore/client" 26 objstoretracing "github.com/thanos-io/objstore/tracing/opentracing" 27 28 thanosblock "github.com/thanos-io/thanos/pkg/block" 29 "github.com/thanos-io/thanos/pkg/compact" 30 "github.com/thanos-io/thanos/pkg/component" 31 "github.com/thanos-io/thanos/pkg/extprom" 32 thanosmodel "github.com/thanos-io/thanos/pkg/model" 33 "github.com/thanos-io/thanos/pkg/prober" 34 "github.com/thanos-io/thanos/pkg/runutil" 35 "github.com/thanos-io/thanos/pkg/server/http" 36 ) 37 38 const ( 39 // Labels for metrics. 40 labelSuccess = "success" 41 labelError = "error" 42 ) 43 44 // ParseFlagMatchers parse flag into matchers. 45 func ParseFlagMatchers(s string) ([]*labels.Matcher, error) { 46 amMatchers, err := amlabels.ParseMatchers(s) 47 if err != nil { 48 return nil, err 49 } 50 matchers := make([]*labels.Matcher, 0, len(amMatchers)) 51 for _, a := range amMatchers { 52 if !model.LabelName.IsValid(model.LabelName(a.Name)) { 53 return nil, errors.Errorf("unsupported format for label %s", a.Name) 54 } 55 matchers = append(matchers, labels.MustNewMatcher(labels.MatchType(a.Type), a.Name, a.Value)) 56 } 57 58 return matchers, nil 59 } 60 61 // RunReplicate replicate data based on config. 62 func RunReplicate( 63 g *run.Group, 64 logger log.Logger, 65 reg *prometheus.Registry, 66 _ opentracing.Tracer, 67 httpBindAddr string, 68 httpTLSConfig string, 69 httpGracePeriod time.Duration, 70 labelSelector labels.Selector, 71 resolutions []compact.ResolutionLevel, 72 compactions []int, 73 fromObjStoreConfig *extflag.PathOrContent, 74 toObjStoreConfig *extflag.PathOrContent, 75 singleRun bool, 76 minTime, maxTime *thanosmodel.TimeOrDurationValue, 77 blockIDs []ulid.ULID, 78 ignoreMarkedForDeletion bool, 79 ) error { 80 logger = log.With(logger, "component", "replicate") 81 82 level.Debug(logger).Log("msg", "setting up http listen-group") 83 84 httpProbe := prober.NewHTTP() 85 statusProber := prober.Combine( 86 httpProbe, 87 prober.NewInstrumentation(component.Replicate, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)), 88 ) 89 90 s := http.New(logger, reg, component.Replicate, httpProbe, 91 http.WithListen(httpBindAddr), 92 http.WithGracePeriod(httpGracePeriod), 93 http.WithTLSConfig(httpTLSConfig), 94 ) 95 96 g.Add(func() error { 97 level.Info(logger).Log("msg", "Listening for http service", "address", httpBindAddr) 98 99 statusProber.Healthy() 100 101 return s.ListenAndServe() 102 }, func(err error) { 103 statusProber.NotReady(err) 104 defer statusProber.NotHealthy(err) 105 106 s.Shutdown(err) 107 }) 108 109 fromConfContentYaml, err := fromObjStoreConfig.Content() 110 if err != nil { 111 return err 112 } 113 114 if len(fromConfContentYaml) == 0 { 115 return errors.New("No supported bucket was configured to replicate from") 116 } 117 118 bkt, err := client.NewBucket(logger, fromConfContentYaml, component.Replicate.String()) 119 if err != nil { 120 return err 121 } 122 fromBkt := objstoretracing.WrapWithTraces( 123 objstore.WrapWithMetrics( 124 bkt, 125 prometheus.WrapRegistererWithPrefix("thanos_", prometheus.WrapRegistererWith(prometheus.Labels{"replicate": "from"}, reg)), 126 bkt.Name(), 127 ), 128 ) 129 130 toConfContentYaml, err := toObjStoreConfig.Content() 131 if err != nil { 132 return err 133 } 134 135 if len(toConfContentYaml) == 0 { 136 return errors.New("No supported bucket was configured to replicate to") 137 } 138 139 toBkt, err := client.NewBucket(logger, toConfContentYaml, component.Replicate.String()) 140 if err != nil { 141 return err 142 } 143 toBkt = objstoretracing.WrapWithTraces( 144 objstore.WrapWithMetrics( 145 toBkt, 146 prometheus.WrapRegistererWithPrefix("thanos_", prometheus.WrapRegistererWith(prometheus.Labels{"replicate": "to"}, reg)), 147 toBkt.Name(), 148 ), 149 ) 150 151 replicationRunCounter := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 152 Name: "thanos_replicate_replication_runs_total", 153 Help: "The number of replication runs split by success and error.", 154 }, []string{"result"}) 155 replicationRunCounter.WithLabelValues(labelSuccess) 156 replicationRunCounter.WithLabelValues(labelError) 157 158 replicationRunDuration := promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ 159 Name: "thanos_replicate_replication_run_duration_seconds", 160 Help: "The Duration of replication runs split by success and error.", 161 }, []string{"result"}) 162 replicationRunDuration.WithLabelValues(labelSuccess) 163 replicationRunDuration.WithLabelValues(labelError) 164 fetcher, err := newMetaFetcher(logger, fromBkt, reg, *minTime, *maxTime, 32, ignoreMarkedForDeletion) 165 if err != nil { 166 return errors.Wrapf(err, "create meta fetcher with bucket %v", fromBkt) 167 } 168 169 blockFilter := NewBlockFilter( 170 logger, 171 labelSelector, 172 resolutions, 173 compactions, 174 blockIDs, 175 ).Filter 176 metrics := newReplicationMetrics(reg) 177 ctx, cancel := context.WithCancel(context.Background()) 178 179 replicateFn := func() error { 180 timestamp := time.Now() 181 entropy := ulid.Monotonic(rand.New(rand.NewSource(timestamp.UnixNano())), 0) 182 183 runID, err := ulid.New(ulid.Timestamp(timestamp), entropy) 184 if err != nil { 185 return errors.Wrap(err, "generate replication run-id") 186 } 187 188 logger := log.With(logger, "replication-run-id", runID.String()) 189 level.Info(logger).Log("msg", "running replication attempt") 190 191 if err := newReplicationScheme(logger, metrics, blockFilter, fetcher, fromBkt, toBkt, reg).execute(ctx); err != nil { 192 return errors.Wrap(err, "replication execute") 193 } 194 195 return nil 196 } 197 198 g.Add(func() error { 199 defer runutil.CloseWithLogOnErr(logger, fromBkt, "from bucket client") 200 defer runutil.CloseWithLogOnErr(logger, toBkt, "to bucket client") 201 202 statusProber.Ready() 203 if singleRun || len(blockIDs) > 0 { 204 return replicateFn() 205 } 206 207 return runutil.Repeat(time.Minute, ctx.Done(), func() error { 208 start := time.Now() 209 if err := replicateFn(); err != nil { 210 level.Error(logger).Log("msg", "running replication failed", "err", err) 211 replicationRunCounter.WithLabelValues(labelError).Inc() 212 replicationRunDuration.WithLabelValues(labelError).Observe(time.Since(start).Seconds()) 213 214 // No matter the error we want to repeat indefinitely. 215 return nil 216 } 217 replicationRunCounter.WithLabelValues(labelSuccess).Inc() 218 replicationRunDuration.WithLabelValues(labelSuccess).Observe(time.Since(start).Seconds()) 219 level.Info(logger).Log("msg", "ran replication successfully") 220 221 return nil 222 }) 223 }, func(error) { 224 cancel() 225 }) 226 227 level.Info(logger).Log("msg", "starting replication") 228 229 return nil 230 } 231 232 func newMetaFetcher( 233 logger log.Logger, 234 fromBkt objstore.InstrumentedBucket, 235 reg prometheus.Registerer, 236 minTime, 237 maxTime thanosmodel.TimeOrDurationValue, 238 concurrency int, 239 ignoreMarkedForDeletion bool, 240 ) (*thanosblock.MetaFetcher, error) { 241 filters := []thanosblock.MetadataFilter{ 242 thanosblock.NewTimePartitionMetaFilter(minTime, maxTime), 243 } 244 if ignoreMarkedForDeletion { 245 filters = append(filters, thanosblock.NewIgnoreDeletionMarkFilter(logger, fromBkt, 0, concurrency)) 246 } 247 return thanosblock.NewMetaFetcher( 248 logger, 249 concurrency, 250 fromBkt, 251 "", 252 reg, 253 filters, 254 ) 255 }