github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/series/index/table_manager.go (about) 1 package index 2 3 import ( 4 "context" 5 "errors" 6 "flag" 7 "fmt" 8 "math/rand" 9 "sort" 10 "strings" 11 "time" 12 13 "github.com/go-kit/log/level" 14 "github.com/grafana/dskit/services" 15 "github.com/prometheus/client_golang/prometheus" 16 "github.com/prometheus/client_golang/prometheus/promauto" 17 "github.com/prometheus/common/model" 18 tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" 19 "github.com/weaveworks/common/instrument" 20 "github.com/weaveworks/common/mtime" 21 22 "github.com/grafana/loki/pkg/storage/config" 23 util_log "github.com/grafana/loki/pkg/util/log" 24 ) 25 26 const ( 27 readLabel = "read" 28 writeLabel = "write" 29 30 bucketRetentionEnforcementInterval = 12 * time.Hour 31 ) 32 33 type tableManagerMetrics struct { 34 syncTableDuration *prometheus.HistogramVec 35 tableCapacity *prometheus.GaugeVec 36 createFailures prometheus.Gauge 37 deleteFailures prometheus.Gauge 38 lastSuccessfulSync prometheus.Gauge 39 } 40 41 func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics { 42 m := tableManagerMetrics{} 43 m.syncTableDuration = promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ 44 Namespace: "loki", 45 Name: "table_manager_sync_duration_seconds", 46 Help: "Time spent synching tables.", 47 Buckets: prometheus.DefBuckets, 48 }, []string{"operation", "status_code"}) 49 50 m.tableCapacity = promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ 51 Namespace: "loki", 52 Name: "table_capacity_units", 53 Help: "Per-table capacity, measured in DynamoDB capacity units.", 54 }, []string{"op", "table"}) 55 56 m.createFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{ 57 Namespace: "loki", 58 Name: "table_manager_create_failures", 59 Help: "Number of table creation failures during the last table-manager reconciliation", 60 }) 61 m.deleteFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{ 62 Namespace: "loki", 63 Name: "table_manager_delete_failures", 64 Help: "Number of table deletion failures during the last table-manager reconciliation", 65 }) 66 67 m.lastSuccessfulSync = promauto.With(r).NewGauge(prometheus.GaugeOpts{ 68 Namespace: "loki", 69 Name: "table_manager_sync_success_timestamp_seconds", 70 Help: "Timestamp of the last successful table manager sync.", 71 }) 72 73 return &m 74 } 75 76 // ExtraTables holds the list of tables that TableManager has to manage using a TableClient. 77 // This is useful for managing tables other than Chunk and Index tables. 78 type ExtraTables struct { 79 TableClient TableClient 80 Tables []config.TableDesc 81 } 82 83 // TableManagerConfig holds config for a TableManager 84 type TableManagerConfig struct { 85 // Master 'off-switch' for table capacity updates, e.g. when troubleshooting 86 ThroughputUpdatesDisabled bool `yaml:"throughput_updates_disabled"` 87 88 // Master 'on-switch' for table retention deletions 89 RetentionDeletesEnabled bool `yaml:"retention_deletes_enabled"` 90 91 // How far back tables will be kept before they are deleted 92 RetentionPeriod time.Duration `yaml:"-"` 93 // This is so that we can accept 1w, 1y in the YAML. 94 RetentionPeriodModel model.Duration `yaml:"retention_period"` 95 96 // Period with which the table manager will poll for tables. 97 PollInterval time.Duration `yaml:"poll_interval"` 98 99 // duration a table will be created before it is needed. 100 CreationGracePeriod time.Duration `yaml:"creation_grace_period"` 101 102 IndexTables config.ProvisionConfig `yaml:"index_tables_provisioning"` 103 ChunkTables config.ProvisionConfig `yaml:"chunk_tables_provisioning"` 104 } 105 106 // UnmarshalYAML implements the yaml.Unmarshaler interface. To support RetentionPeriod. 107 func (cfg *TableManagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { 108 // If we call unmarshal on TableManagerConfig, it will call UnmarshalYAML leading to infinite recursion. 109 // To make unmarshal fill the plain data struct rather than calling UnmarshalYAML 110 // again, we have to hide it using a type indirection. 111 type plain TableManagerConfig 112 if err := unmarshal((*plain)(cfg)); err != nil { 113 return err 114 } 115 116 if cfg.RetentionPeriodModel > 0 { 117 cfg.RetentionPeriod = time.Duration(cfg.RetentionPeriodModel) 118 } 119 120 return nil 121 } 122 123 // MarshalYAML implements the yaml.Marshaler interface. To support RetentionPeriod. 124 func (cfg *TableManagerConfig) MarshalYAML() (interface{}, error) { 125 cfg.RetentionPeriodModel = model.Duration(cfg.RetentionPeriod) 126 return cfg, nil 127 } 128 129 // Validate validates the config. 130 func (cfg *TableManagerConfig) Validate() error { 131 // We're setting this field because when using flags, you set the RetentionPeriodModel but not RetentionPeriod. 132 // TODO(gouthamve): Its a hack, but I can't think of any other way :/ 133 if cfg.RetentionPeriodModel > 0 { 134 cfg.RetentionPeriod = time.Duration(cfg.RetentionPeriodModel) 135 } 136 137 return nil 138 } 139 140 // RegisterFlags adds the flags required to config this to the given FlagSet. 141 func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) { 142 f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity") 143 f.BoolVar(&cfg.RetentionDeletesEnabled, "table-manager.retention-deletes-enabled", false, "If true, enables retention deletes of DB tables") 144 f.Var(&cfg.RetentionPeriodModel, "table-manager.retention-period", "Tables older than this retention period are deleted. Must be either 0 (disabled) or a multiple of 24h. When enabled, be aware this setting is destructive to data!") 145 f.DurationVar(&cfg.PollInterval, "table-manager.poll-interval", 2*time.Minute, "How frequently to poll backend to learn our capacity.") 146 f.DurationVar(&cfg.CreationGracePeriod, "table-manager.periodic-table.grace-period", 10*time.Minute, "Periodic tables grace period (duration which table will be created/deleted before/after it's needed).") 147 148 cfg.IndexTables.RegisterFlags("table-manager.index-table", f) 149 cfg.ChunkTables.RegisterFlags("table-manager.chunk-table", f) 150 } 151 152 // BucketClient is used to enforce retention on chunk buckets. 153 type BucketClient interface { 154 DeleteChunksBefore(ctx context.Context, ts time.Time) error 155 } 156 157 // TableManager creates and manages the provisioned throughput on DynamoDB tables 158 type TableManager struct { 159 services.Service 160 161 client TableClient 162 cfg TableManagerConfig 163 schemaCfg config.SchemaConfig 164 maxChunkAge time.Duration 165 bucketClient BucketClient 166 metrics *tableManagerMetrics 167 extraTables []ExtraTables 168 169 bucketRetentionLoop services.Service 170 } 171 172 // NewTableManager makes a new TableManager 173 func NewTableManager(cfg TableManagerConfig, schemaCfg config.SchemaConfig, maxChunkAge time.Duration, tableClient TableClient, 174 objectClient BucketClient, extraTables []ExtraTables, registerer prometheus.Registerer, 175 ) (*TableManager, error) { 176 if cfg.RetentionPeriod != 0 { 177 // Assume the newest config is the one to use for validation of retention 178 indexTablesPeriod := schemaCfg.Configs[len(schemaCfg.Configs)-1].IndexTables.Period 179 if indexTablesPeriod != 0 && cfg.RetentionPeriod%indexTablesPeriod != 0 { 180 return nil, errors.New("retention period should now be a multiple of periodic table duration") 181 } 182 } 183 184 tm := &TableManager{ 185 cfg: cfg, 186 schemaCfg: schemaCfg, 187 maxChunkAge: maxChunkAge, 188 client: tableClient, 189 bucketClient: objectClient, 190 metrics: newTableManagerMetrics(registerer), 191 extraTables: extraTables, 192 } 193 194 tm.Service = services.NewBasicService(tm.starting, tm.loop, tm.stopping) 195 return tm, nil 196 } 197 198 // Start the TableManager 199 func (m *TableManager) starting(ctx context.Context) error { 200 if m.bucketClient != nil && m.cfg.RetentionPeriod != 0 && m.cfg.RetentionDeletesEnabled { 201 m.bucketRetentionLoop = services.NewTimerService(bucketRetentionEnforcementInterval, nil, m.bucketRetentionIteration, nil) 202 return services.StartAndAwaitRunning(ctx, m.bucketRetentionLoop) 203 } 204 return nil 205 } 206 207 // Stop the TableManager 208 func (m *TableManager) stopping(_ error) error { 209 if m.bucketRetentionLoop != nil { 210 return services.StopAndAwaitTerminated(context.Background(), m.bucketRetentionLoop) 211 } 212 m.client.Stop() 213 return nil 214 } 215 216 func (m *TableManager) loop(ctx context.Context) error { 217 ticker := time.NewTicker(m.cfg.PollInterval) 218 defer ticker.Stop() 219 220 if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", instrument.NewHistogramCollector(m.metrics.syncTableDuration), instrument.ErrorCode, func(ctx context.Context) error { 221 return m.SyncTables(ctx) 222 }); err != nil { 223 level.Error(util_log.Logger).Log("msg", "error syncing tables", "err", err) 224 } 225 226 // Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once. 227 select { 228 case <-time.After(time.Duration(rand.Int63n(int64(m.cfg.PollInterval)))): 229 case <-ctx.Done(): 230 return nil 231 } 232 233 for { 234 select { 235 case <-ticker.C: 236 if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", instrument.NewHistogramCollector(m.metrics.syncTableDuration), instrument.ErrorCode, func(ctx context.Context) error { 237 return m.SyncTables(ctx) 238 }); err != nil { 239 level.Error(util_log.Logger).Log("msg", "error syncing tables", "err", err) 240 } 241 case <-ctx.Done(): 242 return nil 243 } 244 } 245 } 246 247 func (m *TableManager) checkAndCreateExtraTables() error { 248 for _, extraTables := range m.extraTables { 249 existingTablesList, err := extraTables.TableClient.ListTables(context.Background()) 250 if err != nil { 251 return err 252 } 253 254 existingTablesMap := map[string]struct{}{} 255 for _, table := range existingTablesList { 256 existingTablesMap[table] = struct{}{} 257 } 258 259 for _, tableDesc := range extraTables.Tables { 260 if _, ok := existingTablesMap[tableDesc.Name]; !ok { 261 // creating table 262 level.Info(util_log.Logger).Log("msg", "creating extra table", 263 "tableName", tableDesc.Name, 264 "provisionedRead", tableDesc.ProvisionedRead, 265 "provisionedWrite", tableDesc.ProvisionedWrite, 266 "useOnDemandMode", tableDesc.UseOnDemandIOMode, 267 "useWriteAutoScale", tableDesc.WriteScale.Enabled, 268 "useReadAutoScale", tableDesc.ReadScale.Enabled, 269 ) 270 err = extraTables.TableClient.CreateTable(context.Background(), tableDesc) 271 if err != nil { 272 return err 273 } 274 continue 275 } else if m.cfg.ThroughputUpdatesDisabled { 276 // table already exists, throughput updates are disabled so no need to check for difference in configured throuhput vs actual 277 continue 278 } 279 280 level.Info(util_log.Logger).Log("msg", "checking throughput of extra table", "table", tableDesc.Name) 281 // table already exists, lets check actual throughput for tables is same as what is in configurations, if not let us update it 282 current, _, err := extraTables.TableClient.DescribeTable(context.Background(), tableDesc.Name) 283 if err != nil { 284 return err 285 } 286 287 if !current.Equals(tableDesc) { 288 level.Info(util_log.Logger).Log("msg", "updating throughput of extra table", 289 "table", tableDesc.Name, 290 "tableName", tableDesc.Name, 291 "provisionedRead", tableDesc.ProvisionedRead, 292 "provisionedWrite", tableDesc.ProvisionedWrite, 293 "useOnDemandMode", tableDesc.UseOnDemandIOMode, 294 "useWriteAutoScale", tableDesc.WriteScale.Enabled, 295 "useReadAutoScale", tableDesc.ReadScale.Enabled, 296 ) 297 err := extraTables.TableClient.UpdateTable(context.Background(), current, tableDesc) 298 if err != nil { 299 return err 300 } 301 } 302 } 303 } 304 305 return nil 306 } 307 308 // single iteration of bucket retention loop 309 func (m *TableManager) bucketRetentionIteration(ctx context.Context) error { 310 err := m.bucketClient.DeleteChunksBefore(ctx, mtime.Now().Add(-m.cfg.RetentionPeriod)) 311 if err != nil { 312 level.Error(util_log.Logger).Log("msg", "error enforcing filesystem retention", "err", err) 313 } 314 315 // don't return error, otherwise timer service would stop. 316 return nil 317 } 318 319 // SyncTables will calculate the tables expected to exist, create those that do 320 // not and update those that need it. It is exposed for testing. 321 func (m *TableManager) SyncTables(ctx context.Context) error { 322 err := m.checkAndCreateExtraTables() 323 if err != nil { 324 return err 325 } 326 327 expected := m.calculateExpectedTables() 328 level.Debug(util_log.Logger).Log("msg", "synching tables", "expected_tables", len(expected)) 329 330 toCreate, toCheckThroughput, toDelete, err := m.partitionTables(ctx, expected) 331 if err != nil { 332 return err 333 } 334 335 if err := m.deleteTables(ctx, toDelete); err != nil { 336 return err 337 } 338 339 if err := m.createTables(ctx, toCreate); err != nil { 340 return err 341 } 342 343 if err := m.updateTables(ctx, toCheckThroughput); err != nil { 344 return err 345 } 346 347 m.metrics.lastSuccessfulSync.SetToCurrentTime() 348 return nil 349 } 350 351 func (m *TableManager) calculateExpectedTables() []config.TableDesc { 352 result := []config.TableDesc{} 353 354 for i, cfg := range m.schemaCfg.Configs { 355 // Consider configs which we are about to hit and requires tables to be created due to grace period 356 if cfg.From.Time.Time().After(mtime.Now().Add(m.cfg.CreationGracePeriod)) { 357 continue 358 } 359 if cfg.IndexTables.Period == 0 { // non-periodic table 360 if len(result) > 0 && result[len(result)-1].Name == cfg.IndexTables.Prefix { 361 continue // already got a non-periodic table with this name 362 } 363 364 table := config.TableDesc{ 365 Name: cfg.IndexTables.Prefix, 366 ProvisionedRead: m.cfg.IndexTables.InactiveReadThroughput, 367 ProvisionedWrite: m.cfg.IndexTables.InactiveWriteThroughput, 368 UseOnDemandIOMode: m.cfg.IndexTables.InactiveThroughputOnDemandMode, 369 Tags: cfg.IndexTables.Tags, 370 } 371 isActive := true 372 if i+1 < len(m.schemaCfg.Configs) { 373 var ( 374 endTime = m.schemaCfg.Configs[i+1].From.Unix() 375 gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second) 376 maxChunkAgeSecs = int64(m.maxChunkAge / time.Second) 377 now = mtime.Now().Unix() 378 ) 379 if now >= endTime+gracePeriodSecs+maxChunkAgeSecs { 380 isActive = false 381 } 382 } 383 if isActive { 384 table.ProvisionedRead = m.cfg.IndexTables.ProvisionedReadThroughput 385 table.ProvisionedWrite = m.cfg.IndexTables.ProvisionedWriteThroughput 386 table.UseOnDemandIOMode = m.cfg.IndexTables.ProvisionedThroughputOnDemandMode 387 if m.cfg.IndexTables.WriteScale.Enabled { 388 table.WriteScale = m.cfg.IndexTables.WriteScale 389 table.UseOnDemandIOMode = false 390 } 391 if m.cfg.IndexTables.ReadScale.Enabled { 392 table.ReadScale = m.cfg.IndexTables.ReadScale 393 table.UseOnDemandIOMode = false 394 } 395 } 396 result = append(result, table) 397 } else { 398 endTime := mtime.Now().Add(m.cfg.CreationGracePeriod) 399 if i+1 < len(m.schemaCfg.Configs) { 400 nextFrom := m.schemaCfg.Configs[i+1].From.Time.Time() 401 if endTime.After(nextFrom) { 402 endTime = nextFrom 403 } 404 } 405 endModelTime := model.TimeFromUnix(endTime.Unix()) 406 result = append(result, cfg.IndexTables.PeriodicTables( 407 cfg.From.Time, endModelTime, m.cfg.IndexTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod, 408 )...) 409 if cfg.ChunkTables.Prefix != "" { 410 result = append(result, cfg.ChunkTables.PeriodicTables( 411 cfg.From.Time, endModelTime, m.cfg.ChunkTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod, 412 )...) 413 } 414 } 415 } 416 417 sort.Sort(byName(result)) 418 return result 419 } 420 421 // partitionTables works out tables that need to be created vs tables that need to be updated 422 func (m *TableManager) partitionTables(ctx context.Context, descriptions []config.TableDesc) ([]config.TableDesc, []config.TableDesc, []config.TableDesc, error) { 423 tables, err := m.client.ListTables(ctx) 424 if err != nil { 425 return nil, nil, nil, err 426 } 427 428 existingTables := make(map[string]struct{}, len(tables)) 429 for _, table := range tables { 430 existingTables[table] = struct{}{} 431 } 432 433 expectedTables := make(map[string]config.TableDesc, len(descriptions)) 434 for _, desc := range descriptions { 435 expectedTables[desc.Name] = desc 436 } 437 438 toCreate, toCheck, toDelete := []config.TableDesc{}, []config.TableDesc{}, []config.TableDesc{} 439 for _, expectedTable := range expectedTables { 440 if _, ok := existingTables[expectedTable.Name]; ok { 441 toCheck = append(toCheck, expectedTable) 442 } else { 443 toCreate = append(toCreate, expectedTable) 444 } 445 } 446 447 if m.cfg.RetentionPeriod > 0 { 448 // Ensure we only delete tables which have a prefix managed by Cortex. 449 tablePrefixes := map[string]struct{}{} 450 for _, cfg := range m.schemaCfg.Configs { 451 if cfg.IndexTables.Prefix != "" { 452 tablePrefixes[cfg.IndexTables.Prefix] = struct{}{} 453 } 454 if cfg.ChunkTables.Prefix != "" { 455 tablePrefixes[cfg.ChunkTables.Prefix] = struct{}{} 456 } 457 } 458 459 for existingTable := range existingTables { 460 if _, ok := expectedTables[existingTable]; !ok { 461 for tblPrefix := range tablePrefixes { 462 if strings.HasPrefix(existingTable, tblPrefix) { 463 toDelete = append(toDelete, config.TableDesc{Name: existingTable}) 464 break 465 } 466 } 467 } 468 } 469 } 470 471 return toCreate, toCheck, toDelete, nil 472 } 473 474 func (m *TableManager) createTables(ctx context.Context, descriptions []config.TableDesc) error { 475 numFailures := 0 476 merr := tsdb_errors.NewMulti() 477 478 for _, desc := range descriptions { 479 level.Debug(util_log.Logger).Log("msg", "creating table", "table", desc.Name) 480 err := m.client.CreateTable(ctx, desc) 481 if err != nil { 482 numFailures++ 483 merr.Add(err) 484 } 485 } 486 487 m.metrics.createFailures.Set(float64(numFailures)) 488 return merr.Err() 489 } 490 491 func (m *TableManager) deleteTables(ctx context.Context, descriptions []config.TableDesc) error { 492 numFailures := 0 493 merr := tsdb_errors.NewMulti() 494 495 for _, desc := range descriptions { 496 level.Info(util_log.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name) 497 if !m.cfg.RetentionDeletesEnabled { 498 continue 499 } 500 501 level.Info(util_log.Logger).Log("msg", "deleting table", "table", desc.Name) 502 err := m.client.DeleteTable(ctx, desc.Name) 503 if err != nil { 504 numFailures++ 505 merr.Add(err) 506 } 507 } 508 509 m.metrics.deleteFailures.Set(float64(numFailures)) 510 return merr.Err() 511 } 512 513 func (m *TableManager) updateTables(ctx context.Context, descriptions []config.TableDesc) error { 514 for _, expected := range descriptions { 515 level.Debug(util_log.Logger).Log("msg", "checking provisioned throughput on table", "table", expected.Name) 516 current, isActive, err := m.client.DescribeTable(ctx, expected.Name) 517 if err != nil { 518 return err 519 } 520 521 m.metrics.tableCapacity.WithLabelValues(readLabel, expected.Name).Set(float64(current.ProvisionedRead)) 522 m.metrics.tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite)) 523 524 if m.cfg.ThroughputUpdatesDisabled { 525 continue 526 } 527 528 if !isActive { 529 level.Info(util_log.Logger).Log("msg", "skipping update on table, not yet ACTIVE", "table", expected.Name) 530 continue 531 } 532 533 if expected.Equals(current) { 534 level.Info(util_log.Logger).Log("msg", "provisioned throughput on table, skipping", "table", current.Name, "read", current.ProvisionedRead, "write", current.ProvisionedWrite) 535 continue 536 } 537 538 err = m.client.UpdateTable(ctx, current, expected) 539 if err != nil { 540 return err 541 } 542 } 543 return nil 544 } 545 546 // ExpectTables compares existing tables to an expected set of tables. Exposed 547 // for testing, 548 func ExpectTables(ctx context.Context, client TableClient, expected []config.TableDesc) error { 549 tables, err := client.ListTables(ctx) 550 if err != nil { 551 return err 552 } 553 554 if len(expected) != len(tables) { 555 return fmt.Errorf("Unexpected number of tables: %v != %v", expected, tables) 556 } 557 558 sort.Strings(tables) 559 sort.Sort(byName(expected)) 560 561 for i, expect := range expected { 562 if tables[i] != expect.Name { 563 return fmt.Errorf("Expected '%s', found '%s'", expect.Name, tables[i]) 564 } 565 566 desc, _, err := client.DescribeTable(ctx, expect.Name) 567 if err != nil { 568 return err 569 } 570 571 if !desc.Equals(expect) { 572 return fmt.Errorf("Expected '%#v', found '%#v' for table '%s'", expect, desc, desc.Name) 573 } 574 } 575 576 return nil 577 }