github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/handle/update.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package handle 15 16 import ( 17 "bytes" 18 "context" 19 "fmt" 20 "math" 21 "strconv" 22 "strings" 23 "sync" 24 "time" 25 26 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 27 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 28 "github.com/whtcorpsinc/BerolinaSQL/terror" 29 "github.com/whtcorpsinc/errors" 30 "github.com/whtcorpsinc/log" 31 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb/oracle" 32 "github.com/whtcorpsinc/milevadb/ekv" 33 "github.com/whtcorpsinc/milevadb/metrics" 34 "github.com/whtcorpsinc/milevadb/schemareplicant" 35 "github.com/whtcorpsinc/milevadb/soliton/chunk" 36 "github.com/whtcorpsinc/milevadb/soliton/codec" 37 "github.com/whtcorpsinc/milevadb/soliton/logutil" 38 "github.com/whtcorpsinc/milevadb/soliton/ranger" 39 "github.com/whtcorpsinc/milevadb/soliton/sqlexec" 40 "github.com/whtcorpsinc/milevadb/soliton/timeutil" 41 "github.com/whtcorpsinc/milevadb/statistics" 42 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 43 "github.com/whtcorpsinc/milevadb/stochastikctx/variable" 44 "github.com/whtcorpsinc/milevadb/types" 45 "go.uber.org/zap" 46 ) 47 48 type blockDeltaMap map[int64]variable.TableDelta 49 50 func (m blockDeltaMap) uFIDelate(id int64, delta int64, count int64, colSize *map[int64]int64) { 51 item := m[id] 52 item.Delta += delta 53 item.Count += count 54 if item.DefCausSize == nil { 55 item.DefCausSize = make(map[int64]int64) 56 } 57 if colSize != nil { 58 for key, val := range *colSize { 59 item.DefCausSize[key] += val 60 } 61 } 62 m[id] = item 63 } 64 65 type errorRateDelta struct { 66 PkID int64 67 PkErrorRate *statistics.ErrorRate 68 IdxErrorRate map[int64]*statistics.ErrorRate 69 } 70 71 type errorRateDeltaMap map[int64]errorRateDelta 72 73 func (m errorRateDeltaMap) uFIDelate(blockID int64, histID int64, rate float64, isIndex bool) { 74 item := m[blockID] 75 if isIndex { 76 if item.IdxErrorRate == nil { 77 item.IdxErrorRate = make(map[int64]*statistics.ErrorRate) 78 } 79 if item.IdxErrorRate[histID] == nil { 80 item.IdxErrorRate[histID] = &statistics.ErrorRate{} 81 } 82 item.IdxErrorRate[histID].UFIDelate(rate) 83 } else { 84 if item.PkErrorRate == nil { 85 item.PkID = histID 86 item.PkErrorRate = &statistics.ErrorRate{} 87 } 88 item.PkErrorRate.UFIDelate(rate) 89 } 90 m[blockID] = item 91 } 92 93 func (m errorRateDeltaMap) merge(deltaMap errorRateDeltaMap) { 94 for blockID, item := range deltaMap { 95 tbl := m[blockID] 96 for histID, errorRate := range item.IdxErrorRate { 97 if tbl.IdxErrorRate == nil { 98 tbl.IdxErrorRate = make(map[int64]*statistics.ErrorRate) 99 } 100 if tbl.IdxErrorRate[histID] == nil { 101 tbl.IdxErrorRate[histID] = &statistics.ErrorRate{} 102 } 103 tbl.IdxErrorRate[histID].Merge(errorRate) 104 } 105 if item.PkErrorRate != nil { 106 if tbl.PkErrorRate == nil { 107 tbl.PkID = item.PkID 108 tbl.PkErrorRate = &statistics.ErrorRate{} 109 } 110 tbl.PkErrorRate.Merge(item.PkErrorRate) 111 } 112 m[blockID] = tbl 113 } 114 } 115 116 func (m errorRateDeltaMap) clear(blockID int64, histID int64, isIndex bool) { 117 item := m[blockID] 118 if isIndex { 119 delete(item.IdxErrorRate, histID) 120 } else { 121 item.PkErrorRate = nil 122 } 123 m[blockID] = item 124 } 125 126 func (h *Handle) merge(s *StochastikStatsDefCauslector, rateMap errorRateDeltaMap) { 127 for id, item := range s.mapper { 128 h.globalMap.uFIDelate(id, item.Delta, item.Count, &item.DefCausSize) 129 } 130 s.mapper = make(blockDeltaMap) 131 rateMap.merge(s.rateMap) 132 s.rateMap = make(errorRateDeltaMap) 133 h.feedback.Merge(s.feedback) 134 s.feedback = statistics.NewQueryFeedbackMap() 135 } 136 137 // StochastikStatsDefCauslector is a list item that holds the delta mapper. If you want to write or read mapper, you must dagger it. 138 type StochastikStatsDefCauslector struct { 139 sync.Mutex 140 141 mapper blockDeltaMap 142 feedback *statistics.QueryFeedbackMap 143 rateMap errorRateDeltaMap 144 next *StochastikStatsDefCauslector 145 // deleted is set to true when a stochastik is closed. Every time we sweep the list, we will remove the useless collector. 146 deleted bool 147 } 148 149 // Delete only sets the deleted flag true, it will be deleted from list when DumpStatsDeltaToKV is called. 150 func (s *StochastikStatsDefCauslector) Delete() { 151 s.Lock() 152 defer s.Unlock() 153 s.deleted = true 154 } 155 156 // UFIDelate will uFIDelates the delta and count for one causet id. 157 func (s *StochastikStatsDefCauslector) UFIDelate(id int64, delta int64, count int64, colSize *map[int64]int64) { 158 s.Lock() 159 defer s.Unlock() 160 s.mapper.uFIDelate(id, delta, count, colSize) 161 } 162 163 var ( 164 // MinLogScanCount is the minimum scan count for a feedback to be logged. 165 MinLogScanCount = int64(1000) 166 // MinLogErrorRate is the minimum error rate for a feedback to be logged. 167 MinLogErrorRate = 0.5 168 ) 169 170 // StoreQueryFeedback merges the feedback into stats collector. 171 func (s *StochastikStatsDefCauslector) StoreQueryFeedback(feedback interface{}, h *Handle) error { 172 q := feedback.(*statistics.QueryFeedback) 173 if !q.Valid || q.Hist == nil { 174 return nil 175 } 176 err := h.RecalculateExpectCount(q) 177 if err != nil { 178 return errors.Trace(err) 179 } 180 rate := q.CalcErrorRate() 181 if !(rate >= MinLogErrorRate && (q.Actual() >= MinLogScanCount || q.Expected >= MinLogScanCount)) { 182 return nil 183 } 184 metrics.SignificantFeedbackCounter.Inc() 185 metrics.StatsInaccuracyRate.Observe(rate) 186 if log.GetLevel() == zap.DebugLevel { 187 h.logDetailedInfo(q) 188 } 189 s.Lock() 190 defer s.Unlock() 191 isIndex := q.Tp == statistics.IndexType 192 s.rateMap.uFIDelate(q.PhysicalID, q.Hist.ID, rate, isIndex) 193 s.feedback.Append(q) 194 return nil 195 } 196 197 // NewStochastikStatsDefCauslector allocates a stats collector for a stochastik. 198 func (h *Handle) NewStochastikStatsDefCauslector() *StochastikStatsDefCauslector { 199 h.listHead.Lock() 200 defer h.listHead.Unlock() 201 newDefCauslector := &StochastikStatsDefCauslector{ 202 mapper: make(blockDeltaMap), 203 rateMap: make(errorRateDeltaMap), 204 next: h.listHead.next, 205 feedback: statistics.NewQueryFeedbackMap(), 206 } 207 h.listHead.next = newDefCauslector 208 return newDefCauslector 209 } 210 211 var ( 212 // DumpStatsDeltaRatio is the lower bound of `Modify Count / Block Count` for stats delta to be dumped. 213 DumpStatsDeltaRatio = 1 / 10000.0 214 // dumpStatsMaxDuration is the max duration since last uFIDelate. 215 dumpStatsMaxDuration = time.Hour 216 ) 217 218 // needDumpStatsDelta returns true when only uFIDelates a small portion of the causet and the time since last uFIDelate 219 // do not exceed one hour. 220 func needDumpStatsDelta(h *Handle, id int64, item variable.TableDelta, currentTime time.Time) bool { 221 if item.InitTime.IsZero() { 222 item.InitTime = currentTime 223 } 224 tbl, ok := h.statsCache.Load().(statsCache).blocks[id] 225 if !ok { 226 // No need to dump if the stats is invalid. 227 return false 228 } 229 if currentTime.Sub(item.InitTime) > dumpStatsMaxDuration { 230 // Dump the stats to ekv at least once an hour. 231 return true 232 } 233 if tbl.Count == 0 || float64(item.Count)/float64(tbl.Count) > DumpStatsDeltaRatio { 234 // Dump the stats when there are many modifications. 235 return true 236 } 237 return false 238 } 239 240 type dumpMode bool 241 242 const ( 243 // DumpAll indicates dump all the delta info in to ekv. 244 DumpAll dumpMode = true 245 // DumFIDelelta indicates dump part of the delta info in to ekv. 246 DumFIDelelta dumpMode = false 247 ) 248 249 // sweepList will loop over the list, merge each stochastik's local stats into handle 250 // and remove closed stochastik's collector. 251 func (h *Handle) sweepList() { 252 prev := h.listHead 253 prev.Lock() 254 errorRateMap := make(errorRateDeltaMap) 255 for curr := prev.next; curr != nil; curr = curr.next { 256 curr.Lock() 257 // Merge the stochastik stats into handle and error rate map. 258 h.merge(curr, errorRateMap) 259 if curr.deleted { 260 prev.next = curr.next 261 // Since the stochastik is already closed, we can safely unlock it here. 262 curr.Unlock() 263 } else { 264 // Unlock the previous dagger, so we only holds at most two stochastik's dagger at the same time. 265 prev.Unlock() 266 prev = curr 267 } 268 } 269 prev.Unlock() 270 h.mu.Lock() 271 h.mu.rateMap.merge(errorRateMap) 272 h.mu.Unlock() 273 h.siftFeedbacks() 274 } 275 276 // siftFeedbacks eliminates feedbacks which are overlapped with others. It is a tradeoff between 277 // feedback accuracy and its overhead. 278 func (h *Handle) siftFeedbacks() { 279 sc := &stmtctx.StatementContext{TimeZone: time.UTC} 280 for k, qs := range h.feedback.Feedbacks { 281 fbs := make([]statistics.Feedback, 0, len(qs)*2) 282 for _, q := range qs { 283 fbs = append(fbs, q.Feedback...) 284 } 285 if len(fbs) == 0 { 286 delete(h.feedback.Feedbacks, k) 287 continue 288 } 289 h.feedback.Feedbacks[k] = h.feedback.Feedbacks[k][:1] 290 h.feedback.Feedbacks[k][0].Feedback, _ = statistics.NonOverlappedFeedbacks(sc, fbs) 291 } 292 h.feedback.Size = len(h.feedback.Feedbacks) 293 } 294 295 // DumpStatsDeltaToKV sweeps the whole list and uFIDelates the global map, then we dumps every causet that held in map to KV. 296 // If the mode is `DumFIDelelta`, it will only dump that delta info that `Modify Count / Block Count` greater than a ratio. 297 func (h *Handle) DumpStatsDeltaToKV(mode dumpMode) error { 298 h.sweepList() 299 currentTime := time.Now() 300 for id, item := range h.globalMap { 301 if mode == DumFIDelelta && !needDumpStatsDelta(h, id, item, currentTime) { 302 continue 303 } 304 uFIDelated, err := h.dumpTableStatCountToKV(id, item) 305 if err != nil { 306 return errors.Trace(err) 307 } 308 if uFIDelated { 309 h.globalMap.uFIDelate(id, -item.Delta, -item.Count, nil) 310 } 311 if err = h.dumpTableStatDefCausSizeToKV(id, item); err != nil { 312 return errors.Trace(err) 313 } 314 if uFIDelated { 315 delete(h.globalMap, id) 316 } else { 317 m := h.globalMap[id] 318 m.DefCausSize = nil 319 h.globalMap[id] = m 320 } 321 } 322 return nil 323 } 324 325 // dumpTableStatDeltaToKV dumps a single delta with some causet to KV and uFIDelates the version. 326 func (h *Handle) dumpTableStatCountToKV(id int64, delta variable.TableDelta) (uFIDelated bool, err error) { 327 if delta.Count == 0 { 328 return true, nil 329 } 330 h.mu.Lock() 331 defer h.mu.Unlock() 332 ctx := context.TODO() 333 exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate) 334 _, err = exec.InterDircute(ctx, "begin") 335 if err != nil { 336 return false, errors.Trace(err) 337 } 338 defer func() { 339 err = finishTransaction(context.Background(), exec, err) 340 }() 341 342 txn, err := h.mu.ctx.Txn(true) 343 if err != nil { 344 return false, errors.Trace(err) 345 } 346 startTS := txn.StartTS() 347 var allegrosql string 348 if delta.Delta < 0 { 349 allegrosql = fmt.Sprintf("uFIDelate allegrosql.stats_spacetime set version = %d, count = count - %d, modify_count = modify_count + %d where block_id = %d and count >= %d", startTS, -delta.Delta, delta.Count, id, -delta.Delta) 350 } else { 351 allegrosql = fmt.Sprintf("uFIDelate allegrosql.stats_spacetime set version = %d, count = count + %d, modify_count = modify_count + %d where block_id = %d", startTS, delta.Delta, delta.Count, id) 352 } 353 err = execALLEGROSQLs(context.Background(), exec, []string{allegrosql}) 354 uFIDelated = h.mu.ctx.GetStochastikVars().StmtCtx.AffectedRows() > 0 355 return 356 } 357 358 func (h *Handle) dumpTableStatDefCausSizeToKV(id int64, delta variable.TableDelta) error { 359 if len(delta.DefCausSize) == 0 { 360 return nil 361 } 362 values := make([]string, 0, len(delta.DefCausSize)) 363 for histID, deltaDefCausSize := range delta.DefCausSize { 364 if deltaDefCausSize == 0 { 365 continue 366 } 367 values = append(values, fmt.Sprintf("(%d, 0, %d, 0, %d)", id, histID, deltaDefCausSize)) 368 } 369 if len(values) == 0 { 370 return nil 371 } 372 allegrosql := fmt.Sprintf("insert into allegrosql.stats_histograms (block_id, is_index, hist_id, distinct_count, tot_col_size) "+ 373 "values %s on duplicate key uFIDelate tot_col_size = tot_col_size + values(tot_col_size)", strings.Join(values, ",")) 374 _, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql) 375 return errors.Trace(err) 376 } 377 378 // DumpStatsFeedbackToKV dumps the stats feedback to KV. 379 func (h *Handle) DumpStatsFeedbackToKV() error { 380 var err error 381 for _, fbs := range h.feedback.Feedbacks { 382 for _, fb := range fbs { 383 if fb.Tp == statistics.PkType { 384 err = h.DumpFeedbackToKV(fb) 385 } else { 386 t, ok := h.statsCache.Load().(statsCache).blocks[fb.PhysicalID] 387 if ok { 388 err = h.DumpFeedbackForIndex(fb, t) 389 } 390 } 391 if err != nil { 392 // For simplicity, we just drop other feedbacks in case of error. 393 break 394 } 395 } 396 } 397 h.feedback = statistics.NewQueryFeedbackMap() 398 return errors.Trace(err) 399 } 400 401 // DumpFeedbackToKV dumps the given feedback to physical ekv layer. 402 func (h *Handle) DumpFeedbackToKV(fb *statistics.QueryFeedback) error { 403 vals, err := statistics.EncodeFeedback(fb) 404 if err != nil { 405 logutil.BgLogger().Debug("error occurred when encoding feedback", zap.Error(err)) 406 return nil 407 } 408 var isIndex int64 409 if fb.Tp == statistics.IndexType { 410 isIndex = 1 411 } 412 allegrosql := fmt.Sprintf("insert into allegrosql.stats_feedback (block_id, hist_id, is_index, feedback) values "+ 413 "(%d, %d, %d, X'%X')", fb.PhysicalID, fb.Hist.ID, isIndex, vals) 414 h.mu.Lock() 415 _, err = h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 416 h.mu.Unlock() 417 if err != nil { 418 metrics.DumpFeedbackCounter.WithLabelValues(metrics.LblError).Inc() 419 } else { 420 metrics.DumpFeedbackCounter.WithLabelValues(metrics.LblOK).Inc() 421 } 422 return errors.Trace(err) 423 } 424 425 // UFIDelateStatsByLocalFeedback will uFIDelate statistics by the local feedback. 426 // Currently, we dump the feedback with the period of 10 minutes, which means 427 // it takes 10 minutes for a feedback to take effect. However, we can use the 428 // feedback locally on this milevadb-server, so it could be used more timely. 429 func (h *Handle) UFIDelateStatsByLocalFeedback(is schemareplicant.SchemaReplicant) { 430 h.sweepList() 431 for _, fbs := range h.feedback.Feedbacks { 432 for _, fb := range fbs { 433 h.mu.Lock() 434 causet, ok := h.getTableByPhysicalID(is, fb.PhysicalID) 435 h.mu.Unlock() 436 if !ok { 437 continue 438 } 439 tblStats := h.GetPartitionStats(causet.Meta(), fb.PhysicalID) 440 newTblStats := tblStats.Copy() 441 if fb.Tp == statistics.IndexType { 442 idx, ok := tblStats.Indices[fb.Hist.ID] 443 if !ok || idx.Histogram.Len() == 0 { 444 continue 445 } 446 newIdx := *idx 447 eqFB, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback) 448 newIdx.CMSketch = statistics.UFIDelateCMSketch(idx.CMSketch, eqFB) 449 newIdx.Histogram = *statistics.UFIDelateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB}) 450 newIdx.Histogram.PreCalculateScalar() 451 newIdx.Flag = statistics.ResetAnalyzeFlag(newIdx.Flag) 452 newTblStats.Indices[fb.Hist.ID] = &newIdx 453 } else { 454 col, ok := tblStats.DeferredCausets[fb.Hist.ID] 455 if !ok || col.Histogram.Len() == 0 { 456 continue 457 } 458 newDefCaus := *col 459 // only use the range query to uFIDelate primary key 460 _, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback) 461 newFB := &statistics.QueryFeedback{Feedback: ranFB} 462 newFB = newFB.DecodeIntValues() 463 newDefCaus.Histogram = *statistics.UFIDelateHistogram(&col.Histogram, newFB) 464 newDefCaus.Flag = statistics.ResetAnalyzeFlag(newDefCaus.Flag) 465 newTblStats.DeferredCausets[fb.Hist.ID] = &newDefCaus 466 } 467 oldCache := h.statsCache.Load().(statsCache) 468 h.uFIDelateStatsCache(oldCache.uFIDelate([]*statistics.Block{newTblStats}, nil, oldCache.version)) 469 } 470 } 471 } 472 473 // UFIDelateErrorRate uFIDelates the error rate of columns from h.rateMap to cache. 474 func (h *Handle) UFIDelateErrorRate(is schemareplicant.SchemaReplicant) { 475 h.mu.Lock() 476 tbls := make([]*statistics.Block, 0, len(h.mu.rateMap)) 477 for id, item := range h.mu.rateMap { 478 causet, ok := h.getTableByPhysicalID(is, id) 479 if !ok { 480 continue 481 } 482 tbl := h.GetPartitionStats(causet.Meta(), id).Copy() 483 if item.PkErrorRate != nil && tbl.DeferredCausets[item.PkID] != nil { 484 col := *tbl.DeferredCausets[item.PkID] 485 col.ErrorRate.Merge(item.PkErrorRate) 486 tbl.DeferredCausets[item.PkID] = &col 487 } 488 for key, val := range item.IdxErrorRate { 489 if tbl.Indices[key] == nil { 490 continue 491 } 492 idx := *tbl.Indices[key] 493 idx.ErrorRate.Merge(val) 494 tbl.Indices[key] = &idx 495 } 496 tbls = append(tbls, tbl) 497 delete(h.mu.rateMap, id) 498 } 499 h.mu.Unlock() 500 oldCache := h.statsCache.Load().(statsCache) 501 h.uFIDelateStatsCache(oldCache.uFIDelate(tbls, nil, oldCache.version)) 502 } 503 504 // HandleUFIDelateStats uFIDelate the stats using feedback. 505 func (h *Handle) HandleUFIDelateStats(is schemareplicant.SchemaReplicant) error { 506 allegrosql := "SELECT distinct block_id from allegrosql.stats_feedback" 507 blocks, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql) 508 if err != nil { 509 return errors.Trace(err) 510 } 511 if len(blocks) == 0 { 512 return nil 513 } 514 515 for _, ptbl := range blocks { 516 // this func lets `defer` works normally, where `Close()` should be called before any return 517 err = func() error { 518 tbl := ptbl.GetInt64(0) 519 allegrosql = fmt.Sprintf("select block_id, hist_id, is_index, feedback from allegrosql.stats_feedback where block_id=%d order by hist_id, is_index", tbl) 520 rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 521 if len(rc) > 0 { 522 defer terror.Call(rc[0].Close) 523 } 524 if err != nil { 525 return errors.Trace(err) 526 } 527 blockID, histID, isIndex := int64(-1), int64(-1), int64(-1) 528 var rows []chunk.Row 529 for { 530 req := rc[0].NewChunk() 531 iter := chunk.NewIterator4Chunk(req) 532 err := rc[0].Next(context.TODO(), req) 533 if err != nil { 534 return errors.Trace(err) 535 } 536 if req.NumRows() == 0 { 537 if len(rows) > 0 { 538 if err := h.handleSingleHistogramUFIDelate(is, rows); err != nil { 539 return errors.Trace(err) 540 } 541 } 542 break 543 } 544 for event := iter.Begin(); event != iter.End(); event = iter.Next() { 545 // len(rows) > 100000 limits the rows to avoid OOM 546 if event.GetInt64(0) != blockID || event.GetInt64(1) != histID || event.GetInt64(2) != isIndex || len(rows) > 100000 { 547 if len(rows) > 0 { 548 if err := h.handleSingleHistogramUFIDelate(is, rows); err != nil { 549 return errors.Trace(err) 550 } 551 } 552 blockID, histID, isIndex = event.GetInt64(0), event.GetInt64(1), event.GetInt64(2) 553 rows = rows[:0] 554 } 555 rows = append(rows, event) 556 } 557 } 558 return nil 559 }() 560 if err != nil { 561 return err 562 } 563 } 564 return nil 565 } 566 567 // handleSingleHistogramUFIDelate uFIDelates the Histogram and CM Sketch using these feedbacks. All the feedbacks for 568 // the same index or column are gathered in `rows`. 569 func (h *Handle) handleSingleHistogramUFIDelate(is schemareplicant.SchemaReplicant, rows []chunk.Row) (err error) { 570 physicalTableID, histID, isIndex := rows[0].GetInt64(0), rows[0].GetInt64(1), rows[0].GetInt64(2) 571 defer func() { 572 if err == nil { 573 err = errors.Trace(h.deleteOutdatedFeedback(physicalTableID, histID, isIndex)) 574 } 575 }() 576 h.mu.Lock() 577 causet, ok := h.getTableByPhysicalID(is, physicalTableID) 578 h.mu.Unlock() 579 // The causet has been deleted. 580 if !ok { 581 return nil 582 } 583 var tbl *statistics.Block 584 if causet.Meta().GetPartitionInfo() != nil { 585 tbl = h.GetPartitionStats(causet.Meta(), physicalTableID) 586 } else { 587 tbl = h.GetTableStats(causet.Meta()) 588 } 589 var cms *statistics.CMSketch 590 var hist *statistics.Histogram 591 if isIndex == 1 { 592 idx, ok := tbl.Indices[histID] 593 if ok && idx.Histogram.Len() > 0 { 594 idxHist := idx.Histogram 595 hist = &idxHist 596 cms = idx.CMSketch.Copy() 597 } 598 } else { 599 col, ok := tbl.DeferredCausets[histID] 600 if ok && col.Histogram.Len() > 0 { 601 colHist := col.Histogram 602 hist = &colHist 603 } 604 } 605 // The column or index has been deleted. 606 if hist == nil { 607 return nil 608 } 609 q := &statistics.QueryFeedback{} 610 for _, event := range rows { 611 err1 := statistics.DecodeFeedback(event.GetBytes(3), q, cms, hist.Tp) 612 if err1 != nil { 613 logutil.BgLogger().Debug("decode feedback failed", zap.Error(err)) 614 } 615 } 616 err = h.dumpStatsUFIDelateToKV(physicalTableID, isIndex, q, hist, cms) 617 return errors.Trace(err) 618 } 619 620 func (h *Handle) deleteOutdatedFeedback(blockID, histID, isIndex int64) error { 621 h.mu.Lock() 622 defer h.mu.Unlock() 623 hasData := true 624 for hasData { 625 allegrosql := fmt.Sprintf("delete from allegrosql.stats_feedback where block_id = %d and hist_id = %d and is_index = %d limit 10000", blockID, histID, isIndex) 626 _, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 627 if err != nil { 628 return errors.Trace(err) 629 } 630 hasData = h.mu.ctx.GetStochastikVars().StmtCtx.AffectedRows() > 0 631 } 632 return nil 633 } 634 635 func (h *Handle) dumpStatsUFIDelateToKV(blockID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch) error { 636 hist = statistics.UFIDelateHistogram(hist, q) 637 err := h.SaveStatsToStorage(blockID, -1, int(isIndex), hist, cms, 0) 638 metrics.UFIDelateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc() 639 return errors.Trace(err) 640 } 641 642 const ( 643 // StatsTenantKey is the stats tenant path that is saved to etcd. 644 StatsTenantKey = "/milevadb/stats/tenant" 645 // StatsPrompt is the prompt for stats tenant manager. 646 StatsPrompt = "stats" 647 ) 648 649 // AutoAnalyzeMinCnt means if the count of causet is less than this value, we needn't do auto analyze. 650 var AutoAnalyzeMinCnt int64 = 1000 651 652 // TableAnalyzed checks if the causet is analyzed. 653 func TableAnalyzed(tbl *statistics.Block) bool { 654 for _, col := range tbl.DeferredCausets { 655 if col.Count > 0 { 656 return true 657 } 658 } 659 for _, idx := range tbl.Indices { 660 if idx.Histogram.Len() > 0 { 661 return true 662 } 663 } 664 return false 665 } 666 667 // NeedAnalyzeTable checks if we need to analyze the causet: 668 // 1. If the causet has never been analyzed, we need to analyze it when it has 669 // not been modified for a while. 670 // 2. If the causet had been analyzed before, we need to analyze it when 671 // "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio" and the current time is 672 // between `start` and `end`. 673 func NeedAnalyzeTable(tbl *statistics.Block, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) (bool, string) { 674 analyzed := TableAnalyzed(tbl) 675 if !analyzed { 676 t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond)) 677 dur := time.Since(t) 678 return dur >= limit, fmt.Sprintf("causet unanalyzed, time since last uFIDelated %vs", dur) 679 } 680 // Auto analyze is disabled. 681 if autoAnalyzeRatio == 0 { 682 return false, "" 683 } 684 // No need to analyze it. 685 if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio { 686 return false, "" 687 } 688 // Tests if current time is within the time period. 689 return timeutil.WithinDayTimePeriod(start, end, now), fmt.Sprintf("too many modifications(%v/%v>%v)", tbl.ModifyCount, tbl.Count, autoAnalyzeRatio) 690 } 691 692 func (h *Handle) getAutoAnalyzeParameters() map[string]string { 693 allegrosql := fmt.Sprintf("select variable_name, variable_value from allegrosql.global_variables where variable_name in ('%s', '%s', '%s')", 694 variable.MilevaDBAutoAnalyzeRatio, variable.MilevaDBAutoAnalyzeStartTime, variable.MilevaDBAutoAnalyzeEndTime) 695 rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql) 696 if err != nil { 697 return map[string]string{} 698 } 699 parameters := make(map[string]string, len(rows)) 700 for _, event := range rows { 701 parameters[event.GetString(0)] = event.GetString(1) 702 } 703 return parameters 704 } 705 706 func parseAutoAnalyzeRatio(ratio string) float64 { 707 autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64) 708 if err != nil { 709 return variable.DefAutoAnalyzeRatio 710 } 711 return math.Max(autoAnalyzeRatio, 0) 712 } 713 714 func parseAnalyzePeriod(start, end string) (time.Time, time.Time, error) { 715 if start == "" { 716 start = variable.DefAutoAnalyzeStartTime 717 } 718 if end == "" { 719 end = variable.DefAutoAnalyzeEndTime 720 } 721 s, err := time.ParseInLocation(variable.FullDayTimeFormat, start, time.UTC) 722 if err != nil { 723 return s, s, errors.Trace(err) 724 } 725 e, err := time.ParseInLocation(variable.FullDayTimeFormat, end, time.UTC) 726 return s, e, err 727 } 728 729 // HandleAutoAnalyze analyzes the newly created causet or index. 730 func (h *Handle) HandleAutoAnalyze(is schemareplicant.SchemaReplicant) { 731 dbs := is.AllSchemaNames() 732 parameters := h.getAutoAnalyzeParameters() 733 autoAnalyzeRatio := parseAutoAnalyzeRatio(parameters[variable.MilevaDBAutoAnalyzeRatio]) 734 start, end, err := parseAnalyzePeriod(parameters[variable.MilevaDBAutoAnalyzeStartTime], parameters[variable.MilevaDBAutoAnalyzeEndTime]) 735 if err != nil { 736 logutil.BgLogger().Error("[stats] parse auto analyze period failed", zap.Error(err)) 737 return 738 } 739 for _, EDB := range dbs { 740 tbls := is.SchemaTables(perceptron.NewCIStr(EDB)) 741 for _, tbl := range tbls { 742 tblInfo := tbl.Meta() 743 pi := tblInfo.GetPartitionInfo() 744 if pi == nil { 745 statsTbl := h.GetTableStats(tblInfo) 746 allegrosql := "analyze causet `" + EDB + "`.`" + tblInfo.Name.O + "`" 747 analyzed := h.autoAnalyzeTable(tblInfo, statsTbl, start, end, autoAnalyzeRatio, allegrosql) 748 if analyzed { 749 return 750 } 751 continue 752 } 753 for _, def := range pi.Definitions { 754 allegrosql := "analyze causet `" + EDB + "`.`" + tblInfo.Name.O + "`" + " partition `" + def.Name.O + "`" 755 statsTbl := h.GetPartitionStats(tblInfo, def.ID) 756 analyzed := h.autoAnalyzeTable(tblInfo, statsTbl, start, end, autoAnalyzeRatio, allegrosql) 757 if analyzed { 758 return 759 } 760 continue 761 } 762 } 763 } 764 } 765 766 func (h *Handle) autoAnalyzeTable(tblInfo *perceptron.TableInfo, statsTbl *statistics.Block, start, end time.Time, ratio float64, allegrosql string) bool { 767 if statsTbl.Pseudo || statsTbl.Count < AutoAnalyzeMinCnt { 768 return false 769 } 770 if needAnalyze, reason := NeedAnalyzeTable(statsTbl, 20*h.Lease(), ratio, start, end, time.Now()); needAnalyze { 771 logutil.BgLogger().Info("[stats] auto analyze triggered", zap.String("allegrosql", allegrosql), zap.String("reason", reason)) 772 h.execAutoAnalyze(allegrosql) 773 return true 774 } 775 for _, idx := range tblInfo.Indices { 776 if _, ok := statsTbl.Indices[idx.ID]; !ok && idx.State == perceptron.StatePublic { 777 allegrosql = fmt.Sprintf("%s index `%s`", allegrosql, idx.Name.O) 778 logutil.BgLogger().Info("[stats] auto analyze for unanalyzed", zap.String("allegrosql", allegrosql)) 779 h.execAutoAnalyze(allegrosql) 780 return true 781 } 782 } 783 return false 784 } 785 786 func (h *Handle) execAutoAnalyze(allegrosql string) { 787 startTime := time.Now() 788 _, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql) 789 dur := time.Since(startTime) 790 metrics.AutoAnalyzeHistogram.Observe(dur.Seconds()) 791 if err != nil { 792 logutil.BgLogger().Error("[stats] auto analyze failed", zap.String("allegrosql", allegrosql), zap.Duration("cost_time", dur), zap.Error(err)) 793 metrics.AutoAnalyzeCounter.WithLabelValues("failed").Inc() 794 } else { 795 metrics.AutoAnalyzeCounter.WithLabelValues("succ").Inc() 796 } 797 } 798 799 // formatBuckets formats bucket from lowBkt to highBkt. 800 func formatBuckets(hg *statistics.Histogram, lowBkt, highBkt, idxDefCauss int) string { 801 if lowBkt == highBkt { 802 return hg.BucketToString(lowBkt, idxDefCauss) 803 } 804 if lowBkt+1 == highBkt { 805 return fmt.Sprintf("%s, %s", hg.BucketToString(lowBkt, idxDefCauss), hg.BucketToString(highBkt, idxDefCauss)) 806 } 807 // do not care the midbse buckets 808 return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.BucketToString(lowBkt, idxDefCauss), 809 highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.BucketToString(highBkt, idxDefCauss)) 810 } 811 812 func colRangeToStr(c *statistics.DeferredCauset, ran *ranger.Range, actual int64, factor float64) string { 813 lowCount, lowBkt := c.LessRowCountWithBktIdx(ran.LowVal[0]) 814 highCount, highBkt := c.LessRowCountWithBktIdx(ran.HighVal[0]) 815 return fmt.Sprintf("range: %s, actual: %d, expected: %d, buckets: {%s}", ran.String(), actual, 816 int64((highCount-lowCount)*factor), formatBuckets(&c.Histogram, lowBkt, highBkt, 0)) 817 } 818 819 func logForIndexRange(idx *statistics.Index, ran *ranger.Range, actual int64, factor float64) string { 820 sc := &stmtctx.StatementContext{TimeZone: time.UTC} 821 lb, err := codec.EncodeKey(sc, nil, ran.LowVal...) 822 if err != nil { 823 return "" 824 } 825 rb, err := codec.EncodeKey(sc, nil, ran.HighVal...) 826 if err != nil { 827 return "" 828 } 829 if idx.CMSketch != nil && bytes.Compare(ekv.Key(lb).PrefixNext(), rb) >= 0 { 830 str, err := types.CausetsToString(ran.LowVal, true) 831 if err != nil { 832 return "" 833 } 834 return fmt.Sprintf("value: %s, actual: %d, expected: %d", str, actual, int64(float64(idx.QueryBytes(lb))*factor)) 835 } 836 l, r := types.NewBytesCauset(lb), types.NewBytesCauset(rb) 837 lowCount, lowBkt := idx.LessRowCountWithBktIdx(l) 838 highCount, highBkt := idx.LessRowCountWithBktIdx(r) 839 return fmt.Sprintf("range: %s, actual: %d, expected: %d, histogram: {%s}", ran.String(), actual, 840 int64((highCount-lowCount)*factor), formatBuckets(&idx.Histogram, lowBkt, highBkt, len(idx.Info.DeferredCausets))) 841 } 842 843 func logForIndex(prefix string, t *statistics.Block, idx *statistics.Index, ranges []*ranger.Range, actual []int64, factor float64) { 844 sc := &stmtctx.StatementContext{TimeZone: time.UTC} 845 if idx.CMSketch == nil || idx.StatsVer != statistics.Version1 { 846 for i, ran := range ranges { 847 logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.String("rangeStr", logForIndexRange(idx, ran, actual[i], factor))) 848 } 849 return 850 } 851 for i, ran := range ranges { 852 rangePosition := statistics.GetOrdinalOfRangeCond(sc, ran) 853 // only contains range or equality query 854 if rangePosition == 0 || rangePosition == len(ran.LowVal) { 855 logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.String("rangeStr", logForIndexRange(idx, ran, actual[i], factor))) 856 continue 857 } 858 equalityString, err := types.CausetsToString(ran.LowVal[:rangePosition], true) 859 if err != nil { 860 continue 861 } 862 bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition]...) 863 if err != nil { 864 continue 865 } 866 equalityCount := idx.CMSketch.QueryBytes(bytes) 867 rang := ranger.Range{ 868 LowVal: []types.Causet{ran.LowVal[rangePosition]}, 869 HighVal: []types.Causet{ran.HighVal[rangePosition]}, 870 } 871 colName := idx.Info.DeferredCausets[rangePosition].Name.L 872 // prefer index stats over column stats 873 if idxHist := t.IndexStartWithDeferredCauset(colName); idxHist != nil && idxHist.Histogram.Len() > 0 { 874 rangeString := logForIndexRange(idxHist, &rang, -1, factor) 875 logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.Int64("actual", actual[i]), 876 zap.String("equality", equalityString), zap.Uint64("expected equality", equalityCount), 877 zap.String("range", rangeString)) 878 } else if colHist := t.DeferredCausetByName(colName); colHist != nil && colHist.Histogram.Len() > 0 { 879 err = convertRangeType(&rang, colHist.Tp, time.UTC) 880 if err == nil { 881 rangeString := colRangeToStr(colHist, &rang, -1, factor) 882 logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.Int64("actual", actual[i]), 883 zap.String("equality", equalityString), zap.Uint64("expected equality", equalityCount), 884 zap.String("range", rangeString)) 885 } 886 } else { 887 count, err := statistics.GetPseudoRowCountByDeferredCausetRanges(sc, float64(t.Count), []*ranger.Range{&rang}, 0) 888 if err == nil { 889 logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.Int64("actual", actual[i]), 890 zap.String("equality", equalityString), zap.Uint64("expected equality", equalityCount), 891 zap.Stringer("range", &rang), zap.Float64("pseudo count", math.Round(count))) 892 } 893 } 894 } 895 } 896 897 func (h *Handle) logDetailedInfo(q *statistics.QueryFeedback) { 898 t, ok := h.statsCache.Load().(statsCache).blocks[q.PhysicalID] 899 if !ok { 900 return 901 } 902 isIndex := q.Hist.IsIndexHist() 903 ranges, err := q.DecodeToRanges(isIndex) 904 if err != nil { 905 logutil.BgLogger().Debug("decode to ranges failed", zap.Error(err)) 906 return 907 } 908 actual := make([]int64, 0, len(q.Feedback)) 909 for _, fb := range q.Feedback { 910 actual = append(actual, fb.Count) 911 } 912 logPrefix := fmt.Sprintf("[stats-feedback] %s", t.Name) 913 if isIndex { 914 idx := t.Indices[q.Hist.ID] 915 if idx == nil || idx.Histogram.Len() == 0 { 916 return 917 } 918 logForIndex(logPrefix, t, idx, ranges, actual, idx.GetIncreaseFactor(t.Count)) 919 } else { 920 c := t.DeferredCausets[q.Hist.ID] 921 if c == nil || c.Histogram.Len() == 0 { 922 return 923 } 924 logForPK(logPrefix, c, ranges, actual, c.GetIncreaseFactor(t.Count)) 925 } 926 } 927 928 func logForPK(prefix string, c *statistics.DeferredCauset, ranges []*ranger.Range, actual []int64, factor float64) { 929 for i, ran := range ranges { 930 if ran.LowVal[0].GetInt64()+1 >= ran.HighVal[0].GetInt64() { 931 continue 932 } 933 logutil.BgLogger().Debug(prefix, zap.String("column", c.Info.Name.O), zap.String("rangeStr", colRangeToStr(c, ran, actual[i], factor))) 934 } 935 } 936 937 // RecalculateExpectCount recalculates the expect event count if the origin event count is estimated by pseudo. 938 func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error { 939 t, ok := h.statsCache.Load().(statsCache).blocks[q.PhysicalID] 940 if !ok { 941 return nil 942 } 943 blockPseudo := t.Pseudo || t.IsOutdated() 944 if !blockPseudo { 945 return nil 946 } 947 isIndex := q.Hist.Tp.Tp == allegrosql.TypeBlob 948 id := q.Hist.ID 949 if isIndex && (t.Indices[id] == nil || !t.Indices[id].NotAccurate()) { 950 return nil 951 } 952 if !isIndex && (t.DeferredCausets[id] == nil || !t.DeferredCausets[id].NotAccurate()) { 953 return nil 954 } 955 956 sc := &stmtctx.StatementContext{TimeZone: time.UTC} 957 ranges, err := q.DecodeToRanges(isIndex) 958 if err != nil { 959 return errors.Trace(err) 960 } 961 expected := 0.0 962 if isIndex { 963 idx := t.Indices[id] 964 expected, err = idx.GetRowCount(sc, ranges, t.ModifyCount) 965 expected *= idx.GetIncreaseFactor(t.Count) 966 } else { 967 c := t.DeferredCausets[id] 968 expected, err = c.GetDeferredCausetRowCount(sc, ranges, t.ModifyCount, true) 969 expected *= c.GetIncreaseFactor(t.Count) 970 } 971 q.Expected = int64(expected) 972 return err 973 } 974 975 func (h *Handle) dumpRangeFeedback(sc *stmtctx.StatementContext, ran *ranger.Range, rangeCount float64, q *statistics.QueryFeedback) error { 976 lowIsNull := ran.LowVal[0].IsNull() 977 if q.Tp == statistics.IndexType { 978 lower, err := codec.EncodeKey(sc, nil, ran.LowVal[0]) 979 if err != nil { 980 return errors.Trace(err) 981 } 982 upper, err := codec.EncodeKey(sc, nil, ran.HighVal[0]) 983 if err != nil { 984 return errors.Trace(err) 985 } 986 ran.LowVal[0].SetBytes(lower) 987 ran.HighVal[0].SetBytes(upper) 988 } else { 989 if !statistics.SupportDeferredCausetType(q.Hist.Tp) { 990 return nil 991 } 992 if ran.LowVal[0].HoTT() == types.HoTTMinNotNull { 993 ran.LowVal[0] = types.GetMinValue(q.Hist.Tp) 994 } 995 if ran.HighVal[0].HoTT() == types.HoTTMaxValue { 996 ran.HighVal[0] = types.GetMaxValue(q.Hist.Tp) 997 } 998 } 999 ranges, ok := q.Hist.SplitRange(sc, []*ranger.Range{ran}, q.Tp == statistics.IndexType) 1000 if !ok { 1001 logutil.BgLogger().Debug("type of histogram and ranges mismatch") 1002 return nil 1003 } 1004 counts := make([]float64, 0, len(ranges)) 1005 sum := 0.0 1006 for i, r := range ranges { 1007 // Though after `SplitRange`, we may have ranges like `[l, r]`, we still use 1008 // `betweenRowCount` to compute the estimation since the ranges of feedback are all in `[l, r)` 1009 // form, that is to say, we ignore the exclusiveness of ranges from `SplitRange` and just use 1010 // its result of boundary values. 1011 count := q.Hist.BetweenRowCount(r.LowVal[0], r.HighVal[0]) 1012 // We have to include `NullCount` of histogram for [l, r) cases where l is null because `betweenRowCount` 1013 // does not include null values of lower bound. 1014 if i == 0 && lowIsNull { 1015 count += float64(q.Hist.NullCount) 1016 } 1017 sum += count 1018 counts = append(counts, count) 1019 } 1020 if sum <= 1 { 1021 return nil 1022 } 1023 // We assume that each part contributes the same error rate. 1024 adjustFactor := rangeCount / sum 1025 for i, r := range ranges { 1026 q.Feedback = append(q.Feedback, statistics.Feedback{Lower: &r.LowVal[0], Upper: &r.HighVal[0], Count: int64(counts[i] * adjustFactor)}) 1027 } 1028 return errors.Trace(h.DumpFeedbackToKV(q)) 1029 } 1030 1031 func convertRangeType(ran *ranger.Range, ft *types.FieldType, loc *time.Location) error { 1032 err := statistics.ConvertCausetsType(ran.LowVal, ft, loc) 1033 if err != nil { 1034 return err 1035 } 1036 return statistics.ConvertCausetsType(ran.HighVal, ft, loc) 1037 } 1038 1039 // DumpFeedbackForIndex dumps the feedback for index. 1040 // For queries that contains both equality and range query, we will split them and UFIDelate accordingly. 1041 func (h *Handle) DumpFeedbackForIndex(q *statistics.QueryFeedback, t *statistics.Block) error { 1042 idx, ok := t.Indices[q.Hist.ID] 1043 if !ok { 1044 return nil 1045 } 1046 sc := &stmtctx.StatementContext{TimeZone: time.UTC} 1047 if idx.CMSketch == nil || idx.StatsVer != statistics.Version1 { 1048 return h.DumpFeedbackToKV(q) 1049 } 1050 ranges, err := q.DecodeToRanges(true) 1051 if err != nil { 1052 logutil.BgLogger().Debug("decode feedback ranges fail", zap.Error(err)) 1053 return nil 1054 } 1055 for i, ran := range ranges { 1056 rangePosition := statistics.GetOrdinalOfRangeCond(sc, ran) 1057 // only contains range or equality query 1058 if rangePosition == 0 || rangePosition == len(ran.LowVal) { 1059 continue 1060 } 1061 1062 bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition]...) 1063 if err != nil { 1064 logutil.BgLogger().Debug("encode keys fail", zap.Error(err)) 1065 continue 1066 } 1067 equalityCount := float64(idx.CMSketch.QueryBytes(bytes)) * idx.GetIncreaseFactor(t.Count) 1068 rang := &ranger.Range{ 1069 LowVal: []types.Causet{ran.LowVal[rangePosition]}, 1070 HighVal: []types.Causet{ran.HighVal[rangePosition]}, 1071 } 1072 colName := idx.Info.DeferredCausets[rangePosition].Name.L 1073 var rangeCount float64 1074 rangeFB := &statistics.QueryFeedback{PhysicalID: q.PhysicalID} 1075 // prefer index stats over column stats 1076 if idx := t.IndexStartWithDeferredCauset(colName); idx != nil && idx.Histogram.Len() != 0 { 1077 rangeCount, err = t.GetRowCountByIndexRanges(sc, idx.ID, []*ranger.Range{rang}) 1078 rangeFB.Tp, rangeFB.Hist = statistics.IndexType, &idx.Histogram 1079 } else if col := t.DeferredCausetByName(colName); col != nil && col.Histogram.Len() != 0 { 1080 err = convertRangeType(rang, col.Tp, time.UTC) 1081 if err == nil { 1082 rangeCount, err = t.GetRowCountByDeferredCausetRanges(sc, col.ID, []*ranger.Range{rang}) 1083 rangeFB.Tp, rangeFB.Hist = statistics.DefCausType, &col.Histogram 1084 } 1085 } else { 1086 continue 1087 } 1088 if err != nil { 1089 logutil.BgLogger().Debug("get event count by ranges fail", zap.Error(err)) 1090 continue 1091 } 1092 1093 equalityCount, rangeCount = getNewCountForIndex(equalityCount, rangeCount, float64(t.Count), float64(q.Feedback[i].Count)) 1094 value := types.NewBytesCauset(bytes) 1095 q.Feedback[i] = statistics.Feedback{Lower: &value, Upper: &value, Count: int64(equalityCount)} 1096 err = h.dumpRangeFeedback(sc, rang, rangeCount, rangeFB) 1097 if err != nil { 1098 logutil.BgLogger().Debug("dump range feedback fail", zap.Error(err)) 1099 continue 1100 } 1101 } 1102 return errors.Trace(h.DumpFeedbackToKV(q)) 1103 } 1104 1105 // minAdjustFactor is the minimum adjust factor of each index feedback. 1106 // We use it to avoid adjusting too much when the assumption of independence failed. 1107 const minAdjustFactor = 0.7 1108 1109 // getNewCountForIndex adjust the estimated `eqCount` and `rangeCount` according to the real count. 1110 // We assumes that `eqCount` and `rangeCount` contribute the same error rate. 1111 func getNewCountForIndex(eqCount, rangeCount, totalCount, realCount float64) (float64, float64) { 1112 estimate := (eqCount / totalCount) * (rangeCount / totalCount) * totalCount 1113 if estimate <= 1 { 1114 return eqCount, rangeCount 1115 } 1116 adjustFactor := math.Sqrt(realCount / estimate) 1117 adjustFactor = math.Max(adjustFactor, minAdjustFactor) 1118 return eqCount * adjustFactor, rangeCount * adjustFactor 1119 }