github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/cortex/modules.go (about) 1 package cortex 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "net/http" 8 "os" 9 "time" 10 11 "github.com/go-kit/log/level" 12 "github.com/grafana/dskit/kv/codec" 13 "github.com/grafana/dskit/kv/memberlist" 14 "github.com/grafana/dskit/modules" 15 "github.com/grafana/dskit/ring" 16 "github.com/grafana/dskit/runtimeconfig" 17 "github.com/grafana/dskit/services" 18 "github.com/opentracing-contrib/go-stdlib/nethttp" 19 "github.com/opentracing/opentracing-go" 20 "github.com/pkg/errors" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/prometheus/prometheus/promql" 23 "github.com/prometheus/prometheus/rules" 24 prom_storage "github.com/prometheus/prometheus/storage" 25 "github.com/thanos-io/thanos/pkg/discovery/dns" 26 httpgrpc_server "github.com/weaveworks/common/httpgrpc/server" 27 "github.com/weaveworks/common/server" 28 29 "github.com/cortexproject/cortex/pkg/alertmanager" 30 "github.com/cortexproject/cortex/pkg/alertmanager/alertstore" 31 "github.com/cortexproject/cortex/pkg/api" 32 "github.com/cortexproject/cortex/pkg/chunk" 33 "github.com/cortexproject/cortex/pkg/chunk/purger" 34 "github.com/cortexproject/cortex/pkg/chunk/storage" 35 "github.com/cortexproject/cortex/pkg/compactor" 36 configAPI "github.com/cortexproject/cortex/pkg/configs/api" 37 "github.com/cortexproject/cortex/pkg/configs/db" 38 "github.com/cortexproject/cortex/pkg/distributor" 39 "github.com/cortexproject/cortex/pkg/flusher" 40 "github.com/cortexproject/cortex/pkg/frontend" 41 "github.com/cortexproject/cortex/pkg/frontend/transport" 42 "github.com/cortexproject/cortex/pkg/ingester" 43 "github.com/cortexproject/cortex/pkg/querier" 44 "github.com/cortexproject/cortex/pkg/querier/queryrange" 45 "github.com/cortexproject/cortex/pkg/querier/tenantfederation" 46 querier_worker "github.com/cortexproject/cortex/pkg/querier/worker" 47 "github.com/cortexproject/cortex/pkg/ruler" 48 "github.com/cortexproject/cortex/pkg/scheduler" 49 "github.com/cortexproject/cortex/pkg/storegateway" 50 util_log "github.com/cortexproject/cortex/pkg/util/log" 51 "github.com/cortexproject/cortex/pkg/util/validation" 52 ) 53 54 // The various modules that make up Cortex. 55 const ( 56 API string = "api" 57 Ring string = "ring" 58 RuntimeConfig string = "runtime-config" 59 Overrides string = "overrides" 60 OverridesExporter string = "overrides-exporter" 61 Server string = "server" 62 Distributor string = "distributor" 63 DistributorService string = "distributor-service" 64 Ingester string = "ingester" 65 IngesterService string = "ingester-service" 66 Flusher string = "flusher" 67 Querier string = "querier" 68 Queryable string = "queryable" 69 StoreQueryable string = "store-queryable" 70 QueryFrontend string = "query-frontend" 71 QueryFrontendTripperware string = "query-frontend-tripperware" 72 Store string = "store" 73 DeleteRequestsStore string = "delete-requests-store" 74 TableManager string = "table-manager" 75 RulerStorage string = "ruler-storage" 76 Ruler string = "ruler" 77 Configs string = "configs" 78 AlertManager string = "alertmanager" 79 Compactor string = "compactor" 80 StoreGateway string = "store-gateway" 81 MemberlistKV string = "memberlist-kv" 82 ChunksPurger string = "chunks-purger" 83 TenantDeletion string = "tenant-deletion" 84 Purger string = "purger" 85 QueryScheduler string = "query-scheduler" 86 TenantFederation string = "tenant-federation" 87 All string = "all" 88 ) 89 90 func newDefaultConfig() *Config { 91 defaultConfig := &Config{} 92 defaultFS := flag.NewFlagSet("", flag.PanicOnError) 93 defaultConfig.RegisterFlags(defaultFS) 94 return defaultConfig 95 } 96 97 func (t *Cortex) initAPI() (services.Service, error) { 98 t.Cfg.API.ServerPrefix = t.Cfg.Server.PathPrefix 99 t.Cfg.API.LegacyHTTPPrefix = t.Cfg.HTTPPrefix 100 101 a, err := api.New(t.Cfg.API, t.Cfg.Server, t.Server, util_log.Logger) 102 if err != nil { 103 return nil, err 104 } 105 106 t.API = a 107 t.API.RegisterAPI(t.Cfg.Server.PathPrefix, t.Cfg, newDefaultConfig()) 108 109 return nil, nil 110 } 111 112 func (t *Cortex) initServer() (services.Service, error) { 113 // Cortex handles signals on its own. 114 DisableSignalHandling(&t.Cfg.Server) 115 serv, err := server.New(t.Cfg.Server) 116 if err != nil { 117 return nil, err 118 } 119 120 t.Server = serv 121 122 servicesToWaitFor := func() []services.Service { 123 svs := []services.Service(nil) 124 for m, s := range t.ServiceMap { 125 // Server should not wait for itself. 126 if m != Server { 127 svs = append(svs, s) 128 } 129 } 130 return svs 131 } 132 133 s := NewServerService(t.Server, servicesToWaitFor) 134 135 return s, nil 136 } 137 138 func (t *Cortex) initRing() (serv services.Service, err error) { 139 t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.RuntimeConfig) 140 t.Ring, err = ring.New(t.Cfg.Ingester.LifecyclerConfig.RingConfig, "ingester", ring.IngesterRingKey, util_log.Logger, prometheus.WrapRegistererWithPrefix("cortex_", prometheus.DefaultRegisterer)) 141 if err != nil { 142 return nil, err 143 } 144 145 t.API.RegisterRing(t.Ring) 146 147 return t.Ring, nil 148 } 149 150 func (t *Cortex) initRuntimeConfig() (services.Service, error) { 151 if t.Cfg.RuntimeConfig.LoadPath == "" { 152 // no need to initialize module if load path is empty 153 return nil, nil 154 } 155 t.Cfg.RuntimeConfig.Loader = loadRuntimeConfig 156 157 // make sure to set default limits before we start loading configuration into memory 158 validation.SetDefaultLimitsForYAMLUnmarshalling(t.Cfg.LimitsConfig) 159 160 serv, err := runtimeconfig.New(t.Cfg.RuntimeConfig, prometheus.WrapRegistererWithPrefix("cortex_", prometheus.DefaultRegisterer), util_log.Logger) 161 if err == nil { 162 // TenantLimits just delegates to RuntimeConfig and doesn't have any state or need to do 163 // anything in the start/stopping phase. Thus we can create it as part of runtime config 164 // setup without any service instance of its own. 165 t.TenantLimits = newTenantLimits(serv) 166 } 167 168 t.RuntimeConfig = serv 169 t.API.RegisterRuntimeConfig(runtimeConfigHandler(t.RuntimeConfig, t.Cfg.LimitsConfig)) 170 return serv, err 171 } 172 173 func (t *Cortex) initOverrides() (serv services.Service, err error) { 174 t.Overrides, err = validation.NewOverrides(t.Cfg.LimitsConfig, t.TenantLimits) 175 // overrides don't have operational state, nor do they need to do anything more in starting/stopping phase, 176 // so there is no need to return any service. 177 return nil, err 178 } 179 180 func (t *Cortex) initOverridesExporter() (services.Service, error) { 181 if t.Cfg.isModuleEnabled(OverridesExporter) && t.TenantLimits == nil { 182 // This target isn't enabled by default ("all") and requires per-tenant limits to 183 // work. Fail if it can't be setup correctly since the user explicitly wanted this 184 // target to run. 185 return nil, errors.New("overrides-exporter has been enabled, but no runtime configuration file was configured") 186 } 187 188 exporter := validation.NewOverridesExporter(t.TenantLimits) 189 prometheus.MustRegister(exporter) 190 191 // the overrides exporter has no state and reads overrides for runtime configuration each time it 192 // is collected so there is no need to return any service 193 return nil, nil 194 } 195 196 func (t *Cortex) initDistributorService() (serv services.Service, err error) { 197 t.Cfg.Distributor.DistributorRing.ListenPort = t.Cfg.Server.GRPCListenPort 198 t.Cfg.Distributor.ShuffleShardingLookbackPeriod = t.Cfg.Querier.ShuffleShardingIngestersLookbackPeriod 199 200 // Check whether the distributor can join the distributors ring, which is 201 // whenever it's not running as an internal dependency (ie. querier or 202 // ruler's dependency) 203 canJoinDistributorsRing := t.Cfg.isModuleEnabled(Distributor) || t.Cfg.isModuleEnabled(All) 204 205 t.Distributor, err = distributor.New(t.Cfg.Distributor, t.Cfg.IngesterClient, t.Overrides, t.Ring, canJoinDistributorsRing, prometheus.DefaultRegisterer, util_log.Logger) 206 if err != nil { 207 return 208 } 209 210 return t.Distributor, nil 211 } 212 213 func (t *Cortex) initDistributor() (serv services.Service, err error) { 214 t.API.RegisterDistributor(t.Distributor, t.Cfg.Distributor) 215 216 return nil, nil 217 } 218 219 // initQueryable instantiates the queryable and promQL engine used to service queries to 220 // Cortex. It also registers the API endpoints associated with those two services. 221 func (t *Cortex) initQueryable() (serv services.Service, err error) { 222 querierRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "querier"}, prometheus.DefaultRegisterer) 223 224 // Create a querier queryable and PromQL engine 225 t.QuerierQueryable, t.ExemplarQueryable, t.QuerierEngine = querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, t.TombstonesLoader, querierRegisterer, util_log.Logger) 226 227 // Register the default endpoints that are always enabled for the querier module 228 t.API.RegisterQueryable(t.QuerierQueryable, t.Distributor) 229 230 return nil, nil 231 } 232 233 // Enable merge querier if multi tenant query federation is enabled 234 func (t *Cortex) initTenantFederation() (serv services.Service, err error) { 235 if t.Cfg.TenantFederation.Enabled { 236 // Make sure the mergeQuerier is only used for request with more than a 237 // single tenant. This allows for a less impactful enabling of tenant 238 // federation. 239 byPassForSingleQuerier := true 240 t.QuerierQueryable = querier.NewSampleAndChunkQueryable(tenantfederation.NewQueryable(t.QuerierQueryable, byPassForSingleQuerier)) 241 } 242 return nil, nil 243 } 244 245 // initQuerier registers an internal HTTP router with a Prometheus API backed by the 246 // Cortex Queryable. Then it does one of the following: 247 // 248 // 1. Query-Frontend Enabled: If Cortex has an All or QueryFrontend target, the internal 249 // HTTP router is wrapped with Tenant ID parsing middleware and passed to the frontend 250 // worker. 251 // 252 // 2. Querier Standalone: The querier will register the internal HTTP router with the external 253 // HTTP router for the Prometheus API routes. Then the external HTTP server will be passed 254 // as a http.Handler to the frontend worker. 255 // 256 // Route Diagram: 257 // 258 // │ query 259 // │ request 260 // │ 261 // ▼ 262 // ┌──────────────────┐ QF to ┌──────────────────┐ 263 // │ external HTTP │ Worker │ │ 264 // │ router │──────────────▶│ frontend worker │ 265 // │ │ │ │ 266 // └──────────────────┘ └──────────────────┘ 267 // │ │ 268 // │ 269 // only in │ │ 270 // microservice ┌──────────────────┐ │ 271 // querier │ │ internal Querier │ │ 272 // ─ ─ ─ ─▶│ router │◀─────┘ 273 // │ │ 274 // └──────────────────┘ 275 // │ 276 // │ 277 // /metadata & /chunk ┌─────────────────────┼─────────────────────┐ 278 // requests │ │ │ 279 // │ │ │ 280 // ▼ ▼ ▼ 281 // ┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐ 282 // │ │ │ │ │ │ 283 // │Querier Queryable │ │ /api/v1 router │ │ /api/prom router │ 284 // │ │ │ │ │ │ 285 // └──────────────────┘ └──────────────────┘ └──────────────────┘ 286 // ▲ │ │ 287 // │ └──────────┬──────────┘ 288 // │ ▼ 289 // │ ┌──────────────────┐ 290 // │ │ │ 291 // └──────────────────────│ Prometheus API │ 292 // │ │ 293 // └──────────────────┘ 294 // 295 func (t *Cortex) initQuerier() (serv services.Service, err error) { 296 // Create a internal HTTP handler that is configured with the Prometheus API routes and points 297 // to a Prometheus API struct instantiated with the Cortex Queryable. 298 internalQuerierRouter := api.NewQuerierHandler( 299 t.Cfg.API, 300 t.QuerierQueryable, 301 t.ExemplarQueryable, 302 t.QuerierEngine, 303 t.Distributor, 304 t.TombstonesLoader, 305 prometheus.DefaultRegisterer, 306 util_log.Logger, 307 ) 308 309 // If the querier is running standalone without the query-frontend or query-scheduler, we must register it's internal 310 // HTTP handler externally and provide the external Cortex Server HTTP handler to the frontend worker 311 // to ensure requests it processes use the default middleware instrumentation. 312 if !t.Cfg.isModuleEnabled(QueryFrontend) && !t.Cfg.isModuleEnabled(QueryScheduler) && !t.Cfg.isModuleEnabled(All) { 313 // First, register the internal querier handler with the external HTTP server 314 t.API.RegisterQueryAPI(internalQuerierRouter) 315 316 // Second, set the http.Handler that the frontend worker will use to process requests to point to 317 // the external HTTP server. This will allow the querier to consolidate query metrics both external 318 // and internal using the default instrumentation when running as a standalone service. 319 internalQuerierRouter = t.Server.HTTPServer.Handler 320 } else { 321 // Single binary mode requires a query frontend endpoint for the worker. If no frontend and scheduler endpoint 322 // is configured, Cortex will default to using frontend on localhost on it's own GRPC listening port. 323 if t.Cfg.Worker.FrontendAddress == "" && t.Cfg.Worker.SchedulerAddress == "" { 324 address := fmt.Sprintf("127.0.0.1:%d", t.Cfg.Server.GRPCListenPort) 325 level.Warn(util_log.Logger).Log("msg", "Worker address is empty in single binary mode. Attempting automatic worker configuration. If queries are unresponsive consider configuring the worker explicitly.", "address", address) 326 t.Cfg.Worker.FrontendAddress = address 327 } 328 329 // Add a middleware to extract the trace context and add a header. 330 internalQuerierRouter = nethttp.MiddlewareFunc(opentracing.GlobalTracer(), internalQuerierRouter.ServeHTTP, nethttp.OperationNameFunc(func(r *http.Request) string { 331 return "internalQuerier" 332 })) 333 334 // If queries are processed using the external HTTP Server, we need wrap the internal querier with 335 // HTTP router with middleware to parse the tenant ID from the HTTP header and inject it into the 336 // request context. 337 internalQuerierRouter = t.API.AuthMiddleware.Wrap(internalQuerierRouter) 338 } 339 340 // If neither frontend address or scheduler address is configured, no worker is needed. 341 if t.Cfg.Worker.FrontendAddress == "" && t.Cfg.Worker.SchedulerAddress == "" { 342 return nil, nil 343 } 344 345 t.Cfg.Worker.MaxConcurrentRequests = t.Cfg.Querier.MaxConcurrent 346 return querier_worker.NewQuerierWorker(t.Cfg.Worker, httpgrpc_server.NewServer(internalQuerierRouter), util_log.Logger, prometheus.DefaultRegisterer) 347 } 348 349 func (t *Cortex) initStoreQueryables() (services.Service, error) { 350 var servs []services.Service 351 352 //nolint:golint // I prefer this form over removing 'else', because it allows q to have smaller scope. 353 if q, err := initQueryableForEngine(t.Cfg.Storage.Engine, t.Cfg, t.Store, t.Overrides, prometheus.DefaultRegisterer); err != nil { 354 return nil, fmt.Errorf("failed to initialize querier for engine '%s': %v", t.Cfg.Storage.Engine, err) 355 } else { 356 t.StoreQueryables = append(t.StoreQueryables, querier.UseAlwaysQueryable(q)) 357 if s, ok := q.(services.Service); ok { 358 servs = append(servs, s) 359 } 360 } 361 362 if t.Cfg.Querier.SecondStoreEngine != "" { 363 if t.Cfg.Querier.SecondStoreEngine == t.Cfg.Storage.Engine { 364 return nil, fmt.Errorf("second store engine used by querier '%s' must be different than primary engine '%s'", t.Cfg.Querier.SecondStoreEngine, t.Cfg.Storage.Engine) 365 } 366 367 sq, err := initQueryableForEngine(t.Cfg.Querier.SecondStoreEngine, t.Cfg, t.Store, t.Overrides, prometheus.DefaultRegisterer) 368 if err != nil { 369 return nil, fmt.Errorf("failed to initialize querier for engine '%s': %v", t.Cfg.Querier.SecondStoreEngine, err) 370 } 371 372 t.StoreQueryables = append(t.StoreQueryables, querier.UseBeforeTimestampQueryable(sq, time.Time(t.Cfg.Querier.UseSecondStoreBeforeTime))) 373 374 if s, ok := sq.(services.Service); ok { 375 servs = append(servs, s) 376 } 377 } 378 379 // Return service, if any. 380 switch len(servs) { 381 case 0: 382 return nil, nil 383 case 1: 384 return servs[0], nil 385 default: 386 // No need to support this case yet, since chunk store is not a service. 387 // When we get there, we will need a wrapper service, that starts all subservices, and will also monitor them for failures. 388 // Not difficult, but also not necessary right now. 389 return nil, fmt.Errorf("too many services") 390 } 391 } 392 393 func initQueryableForEngine(engine string, cfg Config, chunkStore chunk.Store, limits *validation.Overrides, reg prometheus.Registerer) (prom_storage.Queryable, error) { 394 switch engine { 395 case storage.StorageEngineChunks: 396 if chunkStore == nil { 397 return nil, fmt.Errorf("chunk store not initialized") 398 } 399 return querier.NewChunkStoreQueryable(cfg.Querier, chunkStore), nil 400 401 case storage.StorageEngineBlocks: 402 // When running in single binary, if the blocks sharding is disabled and no custom 403 // store-gateway address has been configured, we can set it to the running process. 404 if cfg.isModuleEnabled(All) && !cfg.StoreGateway.ShardingEnabled && cfg.Querier.StoreGatewayAddresses == "" { 405 cfg.Querier.StoreGatewayAddresses = fmt.Sprintf("127.0.0.1:%d", cfg.Server.GRPCListenPort) 406 } 407 408 return querier.NewBlocksStoreQueryableFromConfig(cfg.Querier, cfg.StoreGateway, cfg.BlocksStorage, limits, util_log.Logger, reg) 409 410 default: 411 return nil, fmt.Errorf("unknown storage engine '%s'", engine) 412 } 413 } 414 415 func (t *Cortex) tsdbIngesterConfig() { 416 t.Cfg.Ingester.BlocksStorageEnabled = t.Cfg.Storage.Engine == storage.StorageEngineBlocks 417 t.Cfg.Ingester.BlocksStorageConfig = t.Cfg.BlocksStorage 418 } 419 420 func (t *Cortex) initIngesterService() (serv services.Service, err error) { 421 t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.Multi.ConfigProvider = multiClientRuntimeConfigChannel(t.RuntimeConfig) 422 t.Cfg.Ingester.LifecyclerConfig.ListenPort = t.Cfg.Server.GRPCListenPort 423 t.Cfg.Ingester.DistributorShardingStrategy = t.Cfg.Distributor.ShardingStrategy 424 t.Cfg.Ingester.DistributorShardByAllLabels = t.Cfg.Distributor.ShardByAllLabels 425 t.Cfg.Ingester.StreamTypeFn = ingesterChunkStreaming(t.RuntimeConfig) 426 t.Cfg.Ingester.InstanceLimitsFn = ingesterInstanceLimits(t.RuntimeConfig) 427 t.tsdbIngesterConfig() 428 429 t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Cfg.IngesterClient, t.Overrides, t.Store, prometheus.DefaultRegisterer, util_log.Logger) 430 if err != nil { 431 return 432 } 433 434 return t.Ingester, nil 435 } 436 437 func (t *Cortex) initIngester() (serv services.Service, err error) { 438 t.API.RegisterIngester(t.Ingester, t.Cfg.Distributor) 439 440 return nil, nil 441 } 442 443 func (t *Cortex) initFlusher() (serv services.Service, err error) { 444 t.tsdbIngesterConfig() 445 446 t.Flusher, err = flusher.New( 447 t.Cfg.Flusher, 448 t.Cfg.Ingester, 449 t.Store, 450 t.Overrides, 451 prometheus.DefaultRegisterer, 452 util_log.Logger, 453 ) 454 if err != nil { 455 return 456 } 457 458 return t.Flusher, nil 459 } 460 461 func (t *Cortex) initChunkStore() (serv services.Service, err error) { 462 if t.Cfg.Storage.Engine != storage.StorageEngineChunks && t.Cfg.Querier.SecondStoreEngine != storage.StorageEngineChunks { 463 return nil, nil 464 } 465 err = t.Cfg.Schema.Load() 466 if err != nil { 467 return 468 } 469 470 t.Store, err = storage.NewStore(t.Cfg.Storage, t.Cfg.ChunkStore, t.Cfg.Schema, t.Overrides, prometheus.DefaultRegisterer, t.TombstonesLoader, util_log.Logger) 471 if err != nil { 472 return 473 } 474 475 return services.NewIdleService(nil, func(_ error) error { 476 t.Store.Stop() 477 return nil 478 }), nil 479 } 480 481 func (t *Cortex) initDeleteRequestsStore() (serv services.Service, err error) { 482 if t.Cfg.Storage.Engine != storage.StorageEngineChunks || !t.Cfg.PurgerConfig.Enable { 483 // until we need to explicitly enable delete series support we need to do create TombstonesLoader without DeleteStore which acts as noop 484 t.TombstonesLoader = purger.NewTombstonesLoader(nil, nil) 485 486 return 487 } 488 489 var indexClient chunk.IndexClient 490 reg := prometheus.WrapRegistererWith( 491 prometheus.Labels{"component": DeleteRequestsStore}, prometheus.DefaultRegisterer) 492 indexClient, err = storage.NewIndexClient(t.Cfg.Storage.DeleteStoreConfig.Store, t.Cfg.Storage, t.Cfg.Schema, reg) 493 if err != nil { 494 return 495 } 496 497 t.DeletesStore, err = purger.NewDeleteStore(t.Cfg.Storage.DeleteStoreConfig, indexClient) 498 if err != nil { 499 return 500 } 501 502 t.TombstonesLoader = purger.NewTombstonesLoader(t.DeletesStore, prometheus.DefaultRegisterer) 503 504 return 505 } 506 507 // initQueryFrontendTripperware instantiates the tripperware used by the query frontend 508 // to optimize Prometheus query requests. 509 func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err error) { 510 // Load the schema only if sharded queries is set. 511 if t.Cfg.QueryRange.ShardedQueries { 512 err := t.Cfg.Schema.Load() 513 if err != nil { 514 return nil, err 515 } 516 } 517 518 tripperware, cache, err := queryrange.NewTripperware( 519 t.Cfg.QueryRange, 520 util_log.Logger, 521 t.Overrides, 522 queryrange.PrometheusCodec, 523 queryrange.PrometheusResponseExtractor{}, 524 t.Cfg.Schema, 525 promql.EngineOpts{ 526 Logger: util_log.Logger, 527 Reg: prometheus.DefaultRegisterer, 528 MaxSamples: t.Cfg.Querier.MaxSamples, 529 Timeout: t.Cfg.Querier.Timeout, 530 EnableAtModifier: t.Cfg.Querier.AtModifierEnabled, 531 NoStepSubqueryIntervalFn: func(int64) int64 { 532 return t.Cfg.Querier.DefaultEvaluationInterval.Milliseconds() 533 }, 534 }, 535 t.Cfg.Querier.QueryIngestersWithin, 536 prometheus.DefaultRegisterer, 537 t.TombstonesLoader, 538 ) 539 540 if err != nil { 541 return nil, err 542 } 543 544 t.QueryFrontendTripperware = tripperware 545 546 return services.NewIdleService(nil, func(_ error) error { 547 if cache != nil { 548 cache.Stop() 549 cache = nil 550 } 551 return nil 552 }), nil 553 } 554 555 func (t *Cortex) initQueryFrontend() (serv services.Service, err error) { 556 roundTripper, frontendV1, frontendV2, err := frontend.InitFrontend(t.Cfg.Frontend, t.Overrides, t.Cfg.Server.GRPCListenPort, util_log.Logger, prometheus.DefaultRegisterer) 557 if err != nil { 558 return nil, err 559 } 560 561 // Wrap roundtripper into Tripperware. 562 roundTripper = t.QueryFrontendTripperware(roundTripper) 563 564 handler := transport.NewHandler(t.Cfg.Frontend.Handler, roundTripper, util_log.Logger, prometheus.DefaultRegisterer) 565 t.API.RegisterQueryFrontendHandler(handler) 566 567 if frontendV1 != nil { 568 t.API.RegisterQueryFrontend1(frontendV1) 569 t.Frontend = frontendV1 570 571 return frontendV1, nil 572 } else if frontendV2 != nil { 573 t.API.RegisterQueryFrontend2(frontendV2) 574 575 return frontendV2, nil 576 } 577 578 return nil, nil 579 } 580 581 func (t *Cortex) initTableManager() (services.Service, error) { 582 if t.Cfg.Storage.Engine == storage.StorageEngineBlocks { 583 return nil, nil // table manager isn't used in v2 584 } 585 586 err := t.Cfg.Schema.Load() 587 if err != nil { 588 return nil, err 589 } 590 591 // Assume the newest config is the one to use 592 lastConfig := &t.Cfg.Schema.Configs[len(t.Cfg.Schema.Configs)-1] 593 594 if (t.Cfg.TableManager.ChunkTables.WriteScale.Enabled || 595 t.Cfg.TableManager.IndexTables.WriteScale.Enabled || 596 t.Cfg.TableManager.ChunkTables.InactiveWriteScale.Enabled || 597 t.Cfg.TableManager.IndexTables.InactiveWriteScale.Enabled || 598 t.Cfg.TableManager.ChunkTables.ReadScale.Enabled || 599 t.Cfg.TableManager.IndexTables.ReadScale.Enabled || 600 t.Cfg.TableManager.ChunkTables.InactiveReadScale.Enabled || 601 t.Cfg.TableManager.IndexTables.InactiveReadScale.Enabled) && 602 t.Cfg.Storage.AWSStorageConfig.Metrics.URL == "" { 603 level.Error(util_log.Logger).Log("msg", "WriteScale is enabled but no Metrics URL has been provided") 604 os.Exit(1) 605 } 606 607 reg := prometheus.WrapRegistererWith( 608 prometheus.Labels{"component": "table-manager-store"}, prometheus.DefaultRegisterer) 609 610 tableClient, err := storage.NewTableClient(lastConfig.IndexType, t.Cfg.Storage, reg) 611 if err != nil { 612 return nil, err 613 } 614 615 bucketClient, err := storage.NewBucketClient(t.Cfg.Storage) 616 util_log.CheckFatal("initializing bucket client", err) 617 618 var extraTables []chunk.ExtraTables 619 if t.Cfg.PurgerConfig.Enable { 620 reg := prometheus.WrapRegistererWith( 621 prometheus.Labels{"component": "table-manager-" + DeleteRequestsStore}, prometheus.DefaultRegisterer) 622 623 deleteStoreTableClient, err := storage.NewTableClient(t.Cfg.Storage.DeleteStoreConfig.Store, t.Cfg.Storage, reg) 624 if err != nil { 625 return nil, err 626 } 627 628 extraTables = append(extraTables, chunk.ExtraTables{TableClient: deleteStoreTableClient, Tables: t.Cfg.Storage.DeleteStoreConfig.GetTables()}) 629 } 630 631 t.TableManager, err = chunk.NewTableManager(t.Cfg.TableManager, t.Cfg.Schema, t.Cfg.Ingester.MaxChunkAge, tableClient, 632 bucketClient, extraTables, prometheus.DefaultRegisterer) 633 return t.TableManager, err 634 } 635 636 func (t *Cortex) initRulerStorage() (serv services.Service, err error) { 637 // if the ruler is not configured and we're in single binary then let's just log an error and continue. 638 // unfortunately there is no way to generate a "default" config and compare default against actual 639 // to determine if it's unconfigured. the following check, however, correctly tests this. 640 // Single binary integration tests will break if this ever drifts 641 if t.Cfg.isModuleEnabled(All) && t.Cfg.Ruler.StoreConfig.IsDefaults() && t.Cfg.RulerStorage.IsDefaults() { 642 level.Info(util_log.Logger).Log("msg", "Ruler storage is not configured in single binary mode and will not be started.") 643 return 644 } 645 646 if !t.Cfg.Ruler.StoreConfig.IsDefaults() { 647 t.RulerStorage, err = ruler.NewLegacyRuleStore(t.Cfg.Ruler.StoreConfig, rules.FileLoader{}, util_log.Logger) 648 } else { 649 t.RulerStorage, err = ruler.NewRuleStore(context.Background(), t.Cfg.RulerStorage, t.Overrides, rules.FileLoader{}, util_log.Logger, prometheus.DefaultRegisterer) 650 } 651 return 652 } 653 654 func (t *Cortex) initRuler() (serv services.Service, err error) { 655 if t.RulerStorage == nil { 656 level.Info(util_log.Logger).Log("msg", "RulerStorage is nil. Not starting the ruler.") 657 return nil, nil 658 } 659 660 t.Cfg.Ruler.Ring.ListenPort = t.Cfg.Server.GRPCListenPort 661 rulerRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "ruler"}, prometheus.DefaultRegisterer) 662 // TODO: Consider wrapping logger to differentiate from querier module logger 663 queryable, _, engine := querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, t.TombstonesLoader, rulerRegisterer, util_log.Logger) 664 665 managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Distributor, queryable, engine, t.Overrides, prometheus.DefaultRegisterer) 666 manager, err := ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, managerFactory, prometheus.DefaultRegisterer, util_log.Logger) 667 if err != nil { 668 return nil, err 669 } 670 671 t.Ruler, err = ruler.NewRuler( 672 t.Cfg.Ruler, 673 manager, 674 prometheus.DefaultRegisterer, 675 util_log.Logger, 676 t.RulerStorage, 677 t.Overrides, 678 ) 679 if err != nil { 680 return 681 } 682 683 // Expose HTTP/GRPC endpoints for the Ruler service 684 t.API.RegisterRuler(t.Ruler) 685 686 // If the API is enabled, register the Ruler API 687 if t.Cfg.Ruler.EnableAPI { 688 t.API.RegisterRulerAPI(ruler.NewAPI(t.Ruler, t.RulerStorage, util_log.Logger)) 689 } 690 691 return t.Ruler, nil 692 } 693 694 func (t *Cortex) initConfig() (serv services.Service, err error) { 695 t.ConfigDB, err = db.New(t.Cfg.Configs.DB) 696 if err != nil { 697 return 698 } 699 700 t.ConfigAPI = configAPI.New(t.ConfigDB, t.Cfg.Configs.API) 701 t.ConfigAPI.RegisterRoutes(t.Server.HTTP) 702 return services.NewIdleService(nil, func(_ error) error { 703 t.ConfigDB.Close() 704 return nil 705 }), nil 706 } 707 708 func (t *Cortex) initAlertManager() (serv services.Service, err error) { 709 t.Cfg.Alertmanager.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort 710 711 // Initialise the store. 712 var store alertstore.AlertStore 713 if !t.Cfg.Alertmanager.Store.IsDefaults() { 714 store, err = alertstore.NewLegacyAlertStore(t.Cfg.Alertmanager.Store, util_log.Logger) 715 } else { 716 store, err = alertstore.NewAlertStore(context.Background(), t.Cfg.AlertmanagerStorage, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) 717 } 718 if err != nil { 719 return 720 } 721 722 t.Alertmanager, err = alertmanager.NewMultitenantAlertmanager(&t.Cfg.Alertmanager, store, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) 723 if err != nil { 724 return 725 } 726 727 t.API.RegisterAlertmanager(t.Alertmanager, t.Cfg.isModuleEnabled(AlertManager), t.Cfg.Alertmanager.EnableAPI) 728 return t.Alertmanager, nil 729 } 730 731 func (t *Cortex) initCompactor() (serv services.Service, err error) { 732 t.Cfg.Compactor.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort 733 734 t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) 735 if err != nil { 736 return 737 } 738 739 // Expose HTTP endpoints. 740 t.API.RegisterCompactor(t.Compactor) 741 return t.Compactor, nil 742 } 743 744 func (t *Cortex) initStoreGateway() (serv services.Service, err error) { 745 if t.Cfg.Storage.Engine != storage.StorageEngineBlocks { 746 if !t.Cfg.isModuleEnabled(All) { 747 return nil, fmt.Errorf("storage engine must be set to blocks to enable the store-gateway") 748 } 749 return nil, nil 750 } 751 752 t.Cfg.StoreGateway.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort 753 754 t.StoreGateway, err = storegateway.NewStoreGateway(t.Cfg.StoreGateway, t.Cfg.BlocksStorage, t.Overrides, t.Cfg.Server.LogLevel, util_log.Logger, prometheus.DefaultRegisterer) 755 if err != nil { 756 return nil, err 757 } 758 759 // Expose HTTP endpoints. 760 t.API.RegisterStoreGateway(t.StoreGateway) 761 762 return t.StoreGateway, nil 763 } 764 765 func (t *Cortex) initMemberlistKV() (services.Service, error) { 766 reg := prometheus.DefaultRegisterer 767 t.Cfg.MemberlistKV.MetricsRegisterer = reg 768 t.Cfg.MemberlistKV.Codecs = []codec.Codec{ 769 ring.GetCodec(), 770 } 771 dnsProviderReg := prometheus.WrapRegistererWithPrefix( 772 "cortex_", 773 prometheus.WrapRegistererWith( 774 prometheus.Labels{"name": "memberlist"}, 775 reg, 776 ), 777 ) 778 dnsProvider := dns.NewProvider(util_log.Logger, dnsProviderReg, dns.GolangResolverType) 779 t.MemberlistKV = memberlist.NewKVInitService(&t.Cfg.MemberlistKV, util_log.Logger, dnsProvider, reg) 780 t.API.RegisterMemberlistKV(t.MemberlistKV) 781 782 // Update the config. 783 t.Cfg.Distributor.DistributorRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV 784 t.Cfg.Ingester.LifecyclerConfig.RingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV 785 t.Cfg.StoreGateway.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV 786 t.Cfg.Compactor.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV 787 t.Cfg.Ruler.Ring.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV 788 t.Cfg.Alertmanager.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV 789 790 return t.MemberlistKV, nil 791 } 792 793 func (t *Cortex) initChunksPurger() (services.Service, error) { 794 if t.Cfg.Storage.Engine != storage.StorageEngineChunks || !t.Cfg.PurgerConfig.Enable { 795 return nil, nil 796 } 797 798 storageClient, err := storage.NewObjectClient(t.Cfg.PurgerConfig.ObjectStoreType, t.Cfg.Storage) 799 if err != nil { 800 return nil, err 801 } 802 803 t.Purger, err = purger.NewPurger(t.Cfg.PurgerConfig, t.DeletesStore, t.Store, storageClient, prometheus.DefaultRegisterer) 804 if err != nil { 805 return nil, err 806 } 807 808 t.API.RegisterChunksPurger(t.DeletesStore, t.Cfg.PurgerConfig.DeleteRequestCancelPeriod) 809 810 return t.Purger, nil 811 } 812 813 func (t *Cortex) initTenantDeletionAPI() (services.Service, error) { 814 if t.Cfg.Storage.Engine != storage.StorageEngineBlocks { 815 return nil, nil 816 } 817 818 // t.RulerStorage can be nil when running in single-binary mode, and rule storage is not configured. 819 tenantDeletionAPI, err := purger.NewTenantDeletionAPI(t.Cfg.BlocksStorage, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) 820 if err != nil { 821 return nil, err 822 } 823 824 t.API.RegisterTenantDeletion(tenantDeletionAPI) 825 return nil, nil 826 } 827 828 func (t *Cortex) initQueryScheduler() (services.Service, error) { 829 s, err := scheduler.NewScheduler(t.Cfg.QueryScheduler, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) 830 if err != nil { 831 return nil, errors.Wrap(err, "query-scheduler init") 832 } 833 834 t.API.RegisterQueryScheduler(s) 835 return s, nil 836 } 837 838 func (t *Cortex) setupModuleManager() error { 839 mm := modules.NewManager(util_log.Logger) 840 841 // Register all modules here. 842 // RegisterModule(name string, initFn func()(services.Service, error)) 843 mm.RegisterModule(Server, t.initServer, modules.UserInvisibleModule) 844 mm.RegisterModule(API, t.initAPI, modules.UserInvisibleModule) 845 mm.RegisterModule(RuntimeConfig, t.initRuntimeConfig, modules.UserInvisibleModule) 846 mm.RegisterModule(MemberlistKV, t.initMemberlistKV, modules.UserInvisibleModule) 847 mm.RegisterModule(Ring, t.initRing, modules.UserInvisibleModule) 848 mm.RegisterModule(Overrides, t.initOverrides, modules.UserInvisibleModule) 849 mm.RegisterModule(OverridesExporter, t.initOverridesExporter) 850 mm.RegisterModule(Distributor, t.initDistributor) 851 mm.RegisterModule(DistributorService, t.initDistributorService, modules.UserInvisibleModule) 852 mm.RegisterModule(Store, t.initChunkStore, modules.UserInvisibleModule) 853 mm.RegisterModule(DeleteRequestsStore, t.initDeleteRequestsStore, modules.UserInvisibleModule) 854 mm.RegisterModule(Ingester, t.initIngester) 855 mm.RegisterModule(IngesterService, t.initIngesterService, modules.UserInvisibleModule) 856 mm.RegisterModule(Flusher, t.initFlusher) 857 mm.RegisterModule(Queryable, t.initQueryable, modules.UserInvisibleModule) 858 mm.RegisterModule(Querier, t.initQuerier) 859 mm.RegisterModule(StoreQueryable, t.initStoreQueryables, modules.UserInvisibleModule) 860 mm.RegisterModule(QueryFrontendTripperware, t.initQueryFrontendTripperware, modules.UserInvisibleModule) 861 mm.RegisterModule(QueryFrontend, t.initQueryFrontend) 862 mm.RegisterModule(TableManager, t.initTableManager) 863 mm.RegisterModule(RulerStorage, t.initRulerStorage, modules.UserInvisibleModule) 864 mm.RegisterModule(Ruler, t.initRuler) 865 mm.RegisterModule(Configs, t.initConfig) 866 mm.RegisterModule(AlertManager, t.initAlertManager) 867 mm.RegisterModule(Compactor, t.initCompactor) 868 mm.RegisterModule(StoreGateway, t.initStoreGateway) 869 mm.RegisterModule(ChunksPurger, t.initChunksPurger, modules.UserInvisibleModule) 870 mm.RegisterModule(TenantDeletion, t.initTenantDeletionAPI, modules.UserInvisibleModule) 871 mm.RegisterModule(Purger, nil) 872 mm.RegisterModule(QueryScheduler, t.initQueryScheduler) 873 mm.RegisterModule(TenantFederation, t.initTenantFederation, modules.UserInvisibleModule) 874 mm.RegisterModule(All, nil) 875 876 // Add dependencies 877 deps := map[string][]string{ 878 API: {Server}, 879 MemberlistKV: {API}, 880 RuntimeConfig: {API}, 881 Ring: {API, RuntimeConfig, MemberlistKV}, 882 Overrides: {RuntimeConfig}, 883 OverridesExporter: {RuntimeConfig}, 884 Distributor: {DistributorService, API}, 885 DistributorService: {Ring, Overrides}, 886 Store: {Overrides, DeleteRequestsStore}, 887 Ingester: {IngesterService, API}, 888 IngesterService: {Overrides, Store, RuntimeConfig, MemberlistKV}, 889 Flusher: {Store, API}, 890 Queryable: {Overrides, DistributorService, Store, Ring, API, StoreQueryable, MemberlistKV}, 891 Querier: {TenantFederation}, 892 StoreQueryable: {Overrides, Store, MemberlistKV}, 893 QueryFrontendTripperware: {API, Overrides, DeleteRequestsStore}, 894 QueryFrontend: {QueryFrontendTripperware}, 895 QueryScheduler: {API, Overrides}, 896 TableManager: {API}, 897 Ruler: {DistributorService, Store, StoreQueryable, RulerStorage}, 898 RulerStorage: {Overrides}, 899 Configs: {API}, 900 AlertManager: {API, MemberlistKV, Overrides}, 901 Compactor: {API, MemberlistKV, Overrides}, 902 StoreGateway: {API, Overrides, MemberlistKV}, 903 ChunksPurger: {Store, DeleteRequestsStore, API}, 904 TenantDeletion: {Store, API, Overrides}, 905 Purger: {ChunksPurger, TenantDeletion}, 906 TenantFederation: {Queryable}, 907 All: {QueryFrontend, Querier, Ingester, Distributor, TableManager, Purger, StoreGateway, Ruler}, 908 } 909 for mod, targets := range deps { 910 if err := mm.AddDependency(mod, targets...); err != nil { 911 return err 912 } 913 } 914 915 t.ModuleManager = mm 916 917 return nil 918 }