github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/automatic_stats.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package stats 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "math/rand" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/settings" 21 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 22 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 23 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 24 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 25 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 26 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 27 "github.com/cockroachdb/cockroach/pkg/util/log" 28 "github.com/cockroachdb/cockroach/pkg/util/stop" 29 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 30 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 31 "github.com/cockroachdb/errors" 32 ) 33 34 // AutoStatsClusterSettingName is the name of the automatic stats collection 35 // cluster setting. 36 const AutoStatsClusterSettingName = "sql.stats.automatic_collection.enabled" 37 38 // AutomaticStatisticsClusterMode controls the cluster setting for enabling 39 // automatic table statistics collection. 40 var AutomaticStatisticsClusterMode = settings.RegisterPublicBoolSetting( 41 AutoStatsClusterSettingName, 42 "automatic statistics collection mode", 43 true, 44 ) 45 46 // MultiColumnStatisticsClusterMode controls the cluster setting for enabling 47 // automatic collection of multi-column statistics. 48 var MultiColumnStatisticsClusterMode = settings.RegisterPublicBoolSetting( 49 "sql.stats.multi_column_collection.enabled", 50 "multi-column statistics collection mode", 51 true, 52 ) 53 54 // AutomaticStatisticsMaxIdleTime controls the maximum fraction of time that 55 // the sampler processors will be idle when scanning large tables for automatic 56 // statistics (in high load scenarios). This value can be tuned to trade off 57 // the runtime vs performance impact of automatic stats. 58 var AutomaticStatisticsMaxIdleTime = settings.RegisterValidatedFloatSetting( 59 "sql.stats.automatic_collection.max_fraction_idle", 60 "maximum fraction of time that automatic statistics sampler processors are idle", 61 0.9, 62 func(val float64) error { 63 if val < 0 || val >= 1 { 64 return pgerror.Newf(pgcode.InvalidParameterValue, 65 "sql.stats.automatic_collection.max_fraction_idle must be >= 0 and < 1 but found: %v", val) 66 } 67 return nil 68 }, 69 ) 70 71 // AutomaticStatisticsFractionStaleRows controls the cluster setting for 72 // the target fraction of rows in a table that should be stale before 73 // statistics on that table are refreshed, in addition to the constant value 74 // AutomaticStatisticsMinStaleRows. 75 var AutomaticStatisticsFractionStaleRows = func() *settings.FloatSetting { 76 s := settings.RegisterNonNegativeFloatSetting( 77 "sql.stats.automatic_collection.fraction_stale_rows", 78 "target fraction of stale rows per table that will trigger a statistics refresh", 79 0.2, 80 ) 81 s.SetVisibility(settings.Public) 82 return s 83 }() 84 85 // AutomaticStatisticsMinStaleRows controls the cluster setting for the target 86 // number of rows that should be updated before a table is refreshed, in 87 // addition to the fraction AutomaticStatisticsFractionStaleRows. 88 var AutomaticStatisticsMinStaleRows = func() *settings.IntSetting { 89 s := settings.RegisterNonNegativeIntSetting( 90 "sql.stats.automatic_collection.min_stale_rows", 91 "target minimum number of stale rows per table that will trigger a statistics refresh", 92 500, 93 ) 94 s.SetVisibility(settings.Public) 95 return s 96 }() 97 98 // DefaultRefreshInterval is the frequency at which the Refresher will check if 99 // the stats for each table should be refreshed. It is mutable for testing. 100 // NB: Updates to this value after Refresher.Start has been called will not 101 // have any effect. 102 var DefaultRefreshInterval = time.Minute 103 104 // DefaultAsOfTime is a duration which is used to define the AS OF time for 105 // automatic runs of CREATE STATISTICS. It is mutable for testing. 106 // NB: Updates to this value after MakeRefresher has been called will not have 107 // any effect. 108 var DefaultAsOfTime = 30 * time.Second 109 110 // bufferedChanFullLogLimiter is used to minimize spamming the log with 111 // "buffered channel is full" errors. 112 var bufferedChanFullLogLimiter = log.Every(time.Second) 113 114 // Constants for automatic statistics collection. 115 // TODO(rytaft): Should these constants be configurable? 116 const ( 117 // AutoStatsName is the name to use for statistics created automatically. 118 // The name is chosen to be something that users are unlikely to choose when 119 // running CREATE STATISTICS manually. 120 AutoStatsName = "__auto__" 121 122 // defaultAverageTimeBetweenRefreshes is the default time to use as the 123 // "average" time between refreshes when there is no information for a given 124 // table. 125 defaultAverageTimeBetweenRefreshes = 12 * time.Hour 126 127 // refreshChanBufferLen is the length of the buffered channel used by the 128 // automatic statistics refresher. If the channel overflows, all SQL mutations 129 // will be ignored by the refresher until it processes some existing mutations 130 // in the buffer and makes space for new ones. SQL mutations will never block 131 // waiting on the refresher. 132 refreshChanBufferLen = 256 133 ) 134 135 // Refresher is responsible for automatically refreshing the table statistics 136 // that are used by the cost-based optimizer. It is necessary to periodically 137 // refresh the statistics to prevent them from becoming stale as data in the 138 // database changes. 139 // 140 // The Refresher is designed to schedule a CREATE STATISTICS refresh job after 141 // approximately X% of total rows have been updated/inserted/deleted in a given 142 // table. Currently, X is hardcoded to be 20%. 143 // 144 // The decision to refresh is based on a percentage rather than a fixed number 145 // of rows because if a table is huge and rarely updated, we don't want to 146 // waste time frequently refreshing stats. Likewise, if it's small and rapidly 147 // updated, we want to update stats more often. 148 // 149 // To avoid contention on row update counters, we use a statistical approach. 150 // For example, suppose we want to refresh stats after 20% of rows are updated 151 // and there are currently 1M rows in the table. If a user updates 10 rows, 152 // we use random number generation to refresh stats with probability 153 // 10/(1M * 0.2) = 0.00005. The general formula is: 154 // 155 // # rows updated/inserted/deleted 156 // p = -------------------------------------------------------------------- 157 // (# rows in table) * (target fraction of rows updated before refresh) 158 // 159 // The existing statistics in the stats cache are used to get the number of 160 // rows in the table. 161 // 162 // In order to prevent small tables from being constantly refreshed, we also 163 // require that approximately 500 rows have changed in addition to the 20%. 164 // 165 // Refresher also implements some heuristic limits designed to corral 166 // statistical outliers. If we haven't refreshed stats in 2x the average time 167 // between the last few refreshes, we automatically trigger a refresh. The 168 // existing statistics in the stats cache are used to calculate the average 169 // time between refreshes as well as to determine when the stats were last 170 // updated. 171 // 172 // If the decision is made to continue with the refresh, Refresher runs 173 // CREATE STATISTICS on the given table with the default set of column 174 // statistics. See comments in sql/create_stats.go for details about which 175 // default columns are chosen. Refresher runs CREATE STATISTICS with 176 // AS OF SYSTEM TIME ā-30sā to minimize performance impact on running 177 // transactions. 178 // 179 // To avoid adding latency to SQL mutation operations, the Refresher is run 180 // in one separate background thread per Server. SQL mutation operations signal 181 // to the Refresher thread by calling NotifyMutation, which sends mutation 182 // metadata to the Refresher thread over a non-blocking buffered channel. The 183 // signaling is best-effort; if the channel is full, the metadata will not be 184 // sent. 185 // 186 type Refresher struct { 187 st *cluster.Settings 188 ex sqlutil.InternalExecutor 189 cache *TableStatisticsCache 190 randGen autoStatsRand 191 192 // mutations is the buffered channel used to pass messages containing 193 // metadata about SQL mutations to the background Refresher thread. 194 mutations chan mutation 195 196 // asOfTime is a duration which is used to define the AS OF time for 197 // runs of CREATE STATISTICS by the Refresher. 198 asOfTime time.Duration 199 200 // extraTime is a small, random amount of extra time to add to the check for 201 // whether too much time has passed since the last statistics refresh. It is 202 // used to avoid having multiple nodes trying to create stats at the same 203 // time. 204 extraTime time.Duration 205 206 // mutationCounts contains aggregated mutation counts for each table that 207 // have yet to be processed by the refresher. 208 mutationCounts map[sqlbase.ID]int64 209 } 210 211 // mutation contains metadata about a SQL mutation and is the message passed to 212 // the background refresher thread to (possibly) trigger a statistics refresh. 213 type mutation struct { 214 tableID sqlbase.ID 215 rowsAffected int 216 } 217 218 // MakeRefresher creates a new Refresher. 219 func MakeRefresher( 220 st *cluster.Settings, 221 ex sqlutil.InternalExecutor, 222 cache *TableStatisticsCache, 223 asOfTime time.Duration, 224 ) *Refresher { 225 randSource := rand.NewSource(rand.Int63()) 226 227 return &Refresher{ 228 st: st, 229 ex: ex, 230 cache: cache, 231 randGen: makeAutoStatsRand(randSource), 232 mutations: make(chan mutation, refreshChanBufferLen), 233 asOfTime: asOfTime, 234 extraTime: time.Duration(rand.Int63n(int64(time.Hour))), 235 mutationCounts: make(map[sqlbase.ID]int64, 16), 236 } 237 } 238 239 // Start starts the stats refresher thread, which polls for messages about 240 // new SQL mutations and refreshes the table statistics with probability 241 // proportional to the percentage of rows affected. 242 func (r *Refresher) Start( 243 ctx context.Context, stopper *stop.Stopper, refreshInterval time.Duration, 244 ) error { 245 stopper.RunWorker(context.Background(), func(ctx context.Context) { 246 // We always sleep for r.asOfTime at the beginning of each refresh, so 247 // subtract it from the refreshInterval. 248 refreshInterval -= r.asOfTime 249 if refreshInterval < 0 { 250 refreshInterval = 0 251 } 252 253 timer := time.NewTimer(refreshInterval) 254 defer timer.Stop() 255 256 // Ensure that read-only tables will have stats created at least 257 // once on startup. 258 const initialTableCollectionDelay = time.Second 259 initialTableCollection := time.After(initialTableCollectionDelay) 260 261 for { 262 select { 263 case <-initialTableCollection: 264 r.ensureAllTables(ctx, &r.st.SV, initialTableCollectionDelay) 265 266 case <-timer.C: 267 mutationCounts := r.mutationCounts 268 if err := stopper.RunAsyncTask( 269 ctx, "stats.Refresher: maybeRefreshStats", func(ctx context.Context) { 270 // Wait so that the latest changes will be reflected according to the 271 // AS OF time. 272 timerAsOf := time.NewTimer(r.asOfTime) 273 defer timerAsOf.Stop() 274 select { 275 case <-timerAsOf.C: 276 break 277 case <-stopper.ShouldQuiesce(): 278 return 279 } 280 281 for tableID, rowsAffected := range mutationCounts { 282 // Check the cluster setting before each refresh in case it was 283 // disabled recently. 284 if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { 285 break 286 } 287 288 r.maybeRefreshStats(ctx, stopper, tableID, rowsAffected, r.asOfTime) 289 290 select { 291 case <-stopper.ShouldQuiesce(): 292 // Don't bother trying to refresh the remaining tables if we 293 // are shutting down. 294 return 295 default: 296 } 297 } 298 timer.Reset(refreshInterval) 299 }); err != nil { 300 log.Errorf(ctx, "failed to refresh stats: %v", err) 301 } 302 r.mutationCounts = make(map[sqlbase.ID]int64, len(r.mutationCounts)) 303 304 case mut := <-r.mutations: 305 r.mutationCounts[mut.tableID] += int64(mut.rowsAffected) 306 307 case <-stopper.ShouldStop(): 308 return 309 } 310 } 311 }) 312 return nil 313 } 314 315 // ensureAllTables ensures that an entry exists in r.mutationCounts for each 316 // table in the database. 317 func (r *Refresher) ensureAllTables( 318 ctx context.Context, settings *settings.Values, initialTableCollectionDelay time.Duration, 319 ) { 320 if !AutomaticStatisticsClusterMode.Get(settings) { 321 // Automatic stats are disabled. 322 return 323 } 324 325 // Use a historical read so as to disable txn contention resolution. 326 getAllTablesQuery := fmt.Sprintf( 327 ` 328 SELECT table_id FROM crdb_internal.tables AS OF SYSTEM TIME '-%s' 329 WHERE schema_name = 'public' 330 AND drop_time IS NULL 331 `, 332 initialTableCollectionDelay) 333 334 rows, err := r.ex.Query( 335 ctx, 336 "get-tables", 337 nil, /* txn */ 338 getAllTablesQuery, 339 ) 340 if err != nil { 341 log.Errorf(ctx, "failed to get tables for automatic stats: %v", err) 342 return 343 } 344 for _, row := range rows { 345 tableID := sqlbase.ID(*row[0].(*tree.DInt)) 346 // Don't create statistics for system tables or virtual tables. 347 // TODO(rytaft): Don't add views here either. Unfortunately views are not 348 // identified differently from tables in crdb_internal.tables. 349 if !sqlbase.IsReservedID(tableID) && !sqlbase.IsVirtualTable(tableID) { 350 r.mutationCounts[tableID] += 0 351 } 352 } 353 } 354 355 // NotifyMutation is called by SQL mutation operations to signal to the 356 // Refresher that a table has been mutated. It should be called after any 357 // successful insert, update, upsert or delete. rowsAffected refers to the 358 // number of rows written as part of the mutation operation. 359 func (r *Refresher) NotifyMutation(tableID sqlbase.ID, rowsAffected int) { 360 if !AutomaticStatisticsClusterMode.Get(&r.st.SV) { 361 // Automatic stats are disabled. 362 return 363 } 364 365 if sqlbase.IsReservedID(tableID) { 366 // Don't try to create statistics for system tables (most importantly, 367 // for table_statistics itself). 368 return 369 } 370 if sqlbase.IsVirtualTable(tableID) { 371 // Don't try to create statistics for virtual tables. 372 return 373 } 374 375 // Send mutation info to the refresher thread to avoid adding latency to 376 // the calling transaction. 377 select { 378 case r.mutations <- mutation{tableID: tableID, rowsAffected: rowsAffected}: 379 default: 380 // Don't block if there is no room in the buffered channel. 381 if bufferedChanFullLogLimiter.ShouldLog() { 382 log.Warningf(context.TODO(), 383 "buffered channel is full. Unable to refresh stats for table %d with %d rows affected", 384 tableID, rowsAffected) 385 } 386 } 387 } 388 389 // maybeRefreshStats implements the core logic described in the comment for 390 // Refresher. It is called by the background Refresher thread. 391 func (r *Refresher) maybeRefreshStats( 392 ctx context.Context, 393 stopper *stop.Stopper, 394 tableID sqlbase.ID, 395 rowsAffected int64, 396 asOf time.Duration, 397 ) { 398 tableStats, err := r.cache.GetTableStats(ctx, tableID) 399 if err != nil { 400 log.Errorf(ctx, "failed to get table statistics: %v", err) 401 return 402 } 403 404 var rowCount float64 405 mustRefresh := false 406 if stat := mostRecentAutomaticStat(tableStats); stat != nil { 407 // Check if too much time has passed since the last refresh. 408 // This check is in place to corral statistical outliers and avoid a 409 // case where a significant portion of the data in a table has changed but 410 // the stats haven't been refreshed. Randomly add some extra time to the 411 // limit check to avoid having multiple nodes trying to create stats at 412 // the same time. 413 // 414 // Note that this can cause some unnecessary runs of CREATE STATISTICS 415 // in the case where there is a heavy write load followed by a very light 416 // load. For example, suppose the average refresh time is 1 hour during 417 // the period of heavy writes, and the average refresh time should be 1 418 // week during the period of light load. It could take ~16 refreshes over 419 // 3-4 weeks before the average settles at around 1 week. (Assuming the 420 // refresh happens at exactly 2x the current average, and the average 421 // refresh time is calculated from the most recent 4 refreshes. See the 422 // comment in stats/delete_stats.go.) 423 maxTimeBetweenRefreshes := stat.CreatedAt.Add(2*avgRefreshTime(tableStats) + r.extraTime) 424 if timeutil.Now().After(maxTimeBetweenRefreshes) { 425 mustRefresh = true 426 } 427 rowCount = float64(stat.RowCount) 428 } else { 429 // If there are no statistics available on this table, we must perform a 430 // refresh. 431 mustRefresh = true 432 } 433 434 targetRows := int64(rowCount*AutomaticStatisticsFractionStaleRows.Get(&r.st.SV)) + 435 AutomaticStatisticsMinStaleRows.Get(&r.st.SV) 436 if !mustRefresh && rowsAffected < math.MaxInt32 && r.randGen.randInt(targetRows) >= rowsAffected { 437 // No refresh is happening this time. 438 return 439 } 440 441 if err := r.refreshStats(ctx, tableID, asOf); err != nil { 442 if errors.Is(err, ConcurrentCreateStatsError) { 443 // Another stats job was already running. Attempt to reschedule this 444 // refresh. 445 if mustRefresh { 446 // For the cases where mustRefresh=true (stats don't yet exist or it 447 // has been 2x the average time since a refresh), we want to make sure 448 // that maybeRefreshStats is called on this table during the next 449 // cycle so that we have another chance to trigger a refresh. We pass 450 // rowsAffected=0 so that we don't force a refresh if another node has 451 // already done it. 452 r.mutations <- mutation{tableID: tableID, rowsAffected: 0} 453 } else { 454 // If this refresh was caused by a "dice roll", we want to make sure 455 // that the refresh is rescheduled so that we adhere to the 456 // AutomaticStatisticsFractionStaleRows statistical ideal. We 457 // ensure that the refresh is triggered during the next cycle by 458 // passing a very large number for rowsAffected. 459 r.mutations <- mutation{tableID: tableID, rowsAffected: math.MaxInt32} 460 } 461 return 462 } 463 464 // Log other errors but don't automatically reschedule the refresh, since 465 // that could lead to endless retries. 466 log.Warningf(ctx, "failed to create statistics on table %d: %v", tableID, err) 467 return 468 } 469 } 470 471 func (r *Refresher) refreshStats( 472 ctx context.Context, tableID sqlbase.ID, asOf time.Duration, 473 ) error { 474 // Create statistics for all default column sets on the given table. 475 _ /* rows */, err := r.ex.Exec( 476 ctx, 477 "create-stats", 478 nil, /* txn */ 479 fmt.Sprintf( 480 "CREATE STATISTICS %s FROM [%d] WITH OPTIONS THROTTLING %g AS OF SYSTEM TIME '-%s'", 481 AutoStatsName, 482 tableID, 483 AutomaticStatisticsMaxIdleTime.Get(&r.st.SV), 484 asOf.String(), 485 ), 486 ) 487 return err 488 } 489 490 // mostRecentAutomaticStat finds the most recent automatic statistic 491 // (identified by the name AutoStatsName). 492 func mostRecentAutomaticStat(tableStats []*TableStatistic) *TableStatistic { 493 // Stats are sorted with the most recent first. 494 for _, stat := range tableStats { 495 if stat.Name == AutoStatsName { 496 return stat 497 } 498 } 499 return nil 500 } 501 502 // avgRefreshTime returns the average time between automatic statistics 503 // refreshes given a list of tableStats from one table. It does so by finding 504 // the most recent automatically generated statistic (identified by the name 505 // AutoStatsName), and then finds all previously generated automatic stats on 506 // those same columns. The average is calculated as the average time between 507 // each consecutive stat. 508 // 509 // If there are not at least two automatically generated statistics on the same 510 // columns, the default value defaultAverageTimeBetweenRefreshes is returned. 511 func avgRefreshTime(tableStats []*TableStatistic) time.Duration { 512 var reference *TableStatistic 513 var sum time.Duration 514 var count int 515 for _, stat := range tableStats { 516 if stat.Name != AutoStatsName { 517 continue 518 } 519 if reference == nil { 520 reference = stat 521 continue 522 } 523 if !areEqual(stat.ColumnIDs, reference.ColumnIDs) { 524 continue 525 } 526 // Stats are sorted with the most recent first. 527 sum += reference.CreatedAt.Sub(stat.CreatedAt) 528 count++ 529 reference = stat 530 } 531 if count == 0 { 532 return defaultAverageTimeBetweenRefreshes 533 } 534 return sum / time.Duration(count) 535 } 536 537 func areEqual(a, b []sqlbase.ColumnID) bool { 538 if len(a) != len(b) { 539 return false 540 } 541 for i := range a { 542 if a[i] != b[i] { 543 return false 544 } 545 } 546 return true 547 } 548 549 // autoStatsRand pairs a rand.Rand with a mutex. 550 type autoStatsRand struct { 551 *syncutil.Mutex 552 *rand.Rand 553 } 554 555 func makeAutoStatsRand(source rand.Source) autoStatsRand { 556 return autoStatsRand{ 557 Mutex: &syncutil.Mutex{}, 558 Rand: rand.New(source), 559 } 560 } 561 562 func (r autoStatsRand) randInt(n int64) int64 { 563 r.Lock() 564 defer r.Unlock() 565 return r.Int63n(n) 566 } 567 568 type concurrentCreateStatisticsError struct{} 569 570 var _ error = concurrentCreateStatisticsError{} 571 572 func (concurrentCreateStatisticsError) Error() string { 573 return "another CREATE STATISTICS job is already running" 574 } 575 576 // ConcurrentCreateStatsError is reported when two CREATE STATISTICS jobs 577 // are issued concurrently. This is a sentinel error. 578 var ConcurrentCreateStatsError error = concurrentCreateStatisticsError{}