github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/handle/handle.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package handle 15 16 import ( 17 "context" 18 "encoding/json" 19 "fmt" 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "github.com/cznic/mathutil" 25 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 26 "github.com/whtcorpsinc/BerolinaSQL/ast" 27 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 28 "github.com/whtcorpsinc/BerolinaSQL/terror" 29 "github.com/whtcorpsinc/errors" 30 "github.com/whtcorpsinc/failpoint" 31 "github.com/whtcorpsinc/milevadb/causet" 32 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb/oracle" 33 "github.com/whtcorpsinc/milevadb/dbs/soliton" 34 "github.com/whtcorpsinc/milevadb/ekv" 35 "github.com/whtcorpsinc/milevadb/schemareplicant" 36 "github.com/whtcorpsinc/milevadb/soliton/chunk" 37 "github.com/whtcorpsinc/milevadb/soliton/logutil" 38 "github.com/whtcorpsinc/milevadb/soliton/memory" 39 "github.com/whtcorpsinc/milevadb/soliton/sqlexec" 40 "github.com/whtcorpsinc/milevadb/statistics" 41 "github.com/whtcorpsinc/milevadb/stochastikctx" 42 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 43 "github.com/whtcorpsinc/milevadb/types" 44 atomic2 "go.uber.org/atomic" 45 "go.uber.org/zap" 46 ) 47 48 // statsCache caches the blocks in memory for Handle. 49 type statsCache struct { 50 blocks map[int64]*statistics.Block 51 // version is the latest version of cache. 52 version uint64 53 memUsage int64 54 } 55 56 // Handle can uFIDelate stats info periodically. 57 type Handle struct { 58 mu struct { 59 sync.Mutex 60 ctx stochastikctx.Context 61 // rateMap contains the error rate delta from feedback. 62 rateMap errorRateDeltaMap 63 // pid2tid is the map from partition ID to causet ID. 64 pid2tid map[int64]int64 65 // schemaVersion is the version of information schemaReplicant when `pid2tid` is built. 66 schemaVersion int64 67 } 68 69 // It can be read by multiple readers at the same time without acquiring dagger, but it can be 70 // written only after acquiring the dagger. 71 statsCache struct { 72 sync.Mutex 73 atomic.Value 74 memTracker *memory.Tracker 75 } 76 77 restrictedInterDirc sqlexec.RestrictedALLEGROSQLInterlockingDirectorate 78 79 // dbsEventCh is a channel to notify a dbs operation has happened. 80 // It is sent only by tenant or the drop stats interlock, and read by stats handle. 81 dbsEventCh chan *soliton.Event 82 // listHead contains all the stats collector required by stochastik. 83 listHead *StochastikStatsDefCauslector 84 // globalMap contains all the delta map from collectors when we dump them to KV. 85 globalMap blockDeltaMap 86 // feedback is used to causetstore query feedback info. 87 feedback *statistics.QueryFeedbackMap 88 89 lease atomic2.Duration 90 } 91 92 // Clear the statsCache, only for test. 93 func (h *Handle) Clear() { 94 h.mu.Lock() 95 h.statsCache.Lock() 96 h.statsCache.CausetStore(statsCache{blocks: make(map[int64]*statistics.Block)}) 97 h.statsCache.memTracker = memory.NewTracker(memory.LabelForStatsCache, -1) 98 h.statsCache.Unlock() 99 for len(h.dbsEventCh) > 0 { 100 <-h.dbsEventCh 101 } 102 h.feedback = statistics.NewQueryFeedbackMap() 103 h.mu.ctx.GetStochastikVars().InitChunkSize = 1 104 h.mu.ctx.GetStochastikVars().MaxChunkSize = 1 105 h.mu.ctx.GetStochastikVars().EnableChunkRPC = false 106 h.mu.ctx.GetStochastikVars().SetProjectionConcurrency(0) 107 h.listHead = &StochastikStatsDefCauslector{mapper: make(blockDeltaMap), rateMap: make(errorRateDeltaMap)} 108 h.globalMap = make(blockDeltaMap) 109 h.mu.rateMap = make(errorRateDeltaMap) 110 h.mu.Unlock() 111 } 112 113 // NewHandle creates a Handle for uFIDelate stats. 114 func NewHandle(ctx stochastikctx.Context, lease time.Duration) *Handle { 115 handle := &Handle{ 116 dbsEventCh: make(chan *soliton.Event, 100), 117 listHead: &StochastikStatsDefCauslector{mapper: make(blockDeltaMap), rateMap: make(errorRateDeltaMap)}, 118 globalMap: make(blockDeltaMap), 119 feedback: statistics.NewQueryFeedbackMap(), 120 } 121 handle.lease.CausetStore(lease) 122 // It is safe to use it concurrently because the exec won't touch the ctx. 123 if exec, ok := ctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate); ok { 124 handle.restrictedInterDirc = exec 125 } 126 handle.statsCache.memTracker = memory.NewTracker(memory.LabelForStatsCache, -1) 127 handle.mu.ctx = ctx 128 handle.mu.rateMap = make(errorRateDeltaMap) 129 handle.statsCache.CausetStore(statsCache{blocks: make(map[int64]*statistics.Block)}) 130 return handle 131 } 132 133 // Lease returns the stats lease. 134 func (h *Handle) Lease() time.Duration { 135 return h.lease.Load() 136 } 137 138 // SetLease sets the stats lease. 139 func (h *Handle) SetLease(lease time.Duration) { 140 h.lease.CausetStore(lease) 141 } 142 143 // GetQueryFeedback gets the query feedback. It is only used in test. 144 func (h *Handle) GetQueryFeedback() *statistics.QueryFeedbackMap { 145 defer func() { 146 h.feedback = statistics.NewQueryFeedbackMap() 147 }() 148 return h.feedback 149 } 150 151 // DurationToTS converts duration to timestamp. 152 func DurationToTS(d time.Duration) uint64 { 153 return oracle.ComposeTS(d.Nanoseconds()/int64(time.Millisecond), 0) 154 } 155 156 // UFIDelate reads stats spacetime from causetstore and uFIDelates the stats map. 157 func (h *Handle) UFIDelate(is schemareplicant.SchemaReplicant) error { 158 oldCache := h.statsCache.Load().(statsCache) 159 lastVersion := oldCache.version 160 // We need this because for two blocks, the smaller version may write later than the one with larger version. 161 // Consider the case that there are two blocks A and B, their version and commit time is (A0, A1) and (B0, B1), 162 // and A0 < B0 < B1 < A1. We will first read the stats of B, and uFIDelate the lastVersion to B0, but we cannot read 163 // the causet stats of A0 if we read stats that greater than lastVersion which is B0. 164 // We can read the stats if the diff between commit time and version is less than three lease. 165 offset := DurationToTS(3 * h.Lease()) 166 if oldCache.version >= offset { 167 lastVersion = lastVersion - offset 168 } else { 169 lastVersion = 0 170 } 171 allegrosql := fmt.Sprintf("SELECT version, block_id, modify_count, count from allegrosql.stats_spacetime where version > %d order by version", lastVersion) 172 rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql) 173 if err != nil { 174 return errors.Trace(err) 175 } 176 177 blocks := make([]*statistics.Block, 0, len(rows)) 178 deletedTableIDs := make([]int64, 0, len(rows)) 179 for _, event := range rows { 180 version := event.GetUint64(0) 181 physicalID := event.GetInt64(1) 182 modifyCount := event.GetInt64(2) 183 count := event.GetInt64(3) 184 lastVersion = version 185 h.mu.Lock() 186 causet, ok := h.getTableByPhysicalID(is, physicalID) 187 h.mu.Unlock() 188 if !ok { 189 logutil.BgLogger().Debug("unknown physical ID in stats spacetime causet, maybe it has been dropped", zap.Int64("ID", physicalID)) 190 deletedTableIDs = append(deletedTableIDs, physicalID) 191 continue 192 } 193 blockInfo := causet.Meta() 194 tbl, err := h.blockStatsFromStorage(blockInfo, physicalID, false, nil) 195 // Error is not nil may mean that there are some dbs changes on this causet, we will not uFIDelate it. 196 if err != nil { 197 logutil.BgLogger().Debug("error occurred when read causet stats", zap.String("causet", blockInfo.Name.O), zap.Error(err)) 198 continue 199 } 200 if tbl == nil { 201 deletedTableIDs = append(deletedTableIDs, physicalID) 202 continue 203 } 204 tbl.Version = version 205 tbl.Count = count 206 tbl.ModifyCount = modifyCount 207 tbl.Name = getFullTableName(is, blockInfo) 208 blocks = append(blocks, tbl) 209 } 210 h.uFIDelateStatsCache(oldCache.uFIDelate(blocks, deletedTableIDs, lastVersion)) 211 return nil 212 } 213 214 func (h *Handle) getTableByPhysicalID(is schemareplicant.SchemaReplicant, physicalID int64) (causet.Block, bool) { 215 if is.SchemaMetaVersion() != h.mu.schemaVersion { 216 h.mu.schemaVersion = is.SchemaMetaVersion() 217 h.mu.pid2tid = buildPartitionID2TableID(is) 218 } 219 if id, ok := h.mu.pid2tid[physicalID]; ok { 220 return is.TableByID(id) 221 } 222 return is.TableByID(physicalID) 223 } 224 225 func buildPartitionID2TableID(is schemareplicant.SchemaReplicant) map[int64]int64 { 226 mapper := make(map[int64]int64) 227 for _, EDB := range is.AllSchemas() { 228 tbls := EDB.Tables 229 for _, tbl := range tbls { 230 pi := tbl.GetPartitionInfo() 231 if pi == nil { 232 continue 233 } 234 for _, def := range pi.Definitions { 235 mapper[def.ID] = tbl.ID 236 } 237 } 238 } 239 return mapper 240 } 241 242 // GetMemConsumed returns the mem size of statscache consumed 243 func (h *Handle) GetMemConsumed() (size int64) { 244 size = h.statsCache.memTracker.BytesConsumed() 245 return 246 } 247 248 // GetAllTableStatsMemUsage get all the mem usage with true causet. 249 // only used by test. 250 func (h *Handle) GetAllTableStatsMemUsage() int64 { 251 data := h.statsCache.Value.Load().(statsCache) 252 cache := data.copy() 253 allUsage := int64(0) 254 for _, t := range cache.blocks { 255 allUsage += t.MemoryUsage() 256 } 257 return allUsage 258 } 259 260 // GetTableStats retrieves the statistics causet from cache, and the cache will be uFIDelated by a goroutine. 261 func (h *Handle) GetTableStats(tblInfo *perceptron.TableInfo) *statistics.Block { 262 return h.GetPartitionStats(tblInfo, tblInfo.ID) 263 } 264 265 // GetPartitionStats retrieves the partition stats from cache. 266 func (h *Handle) GetPartitionStats(tblInfo *perceptron.TableInfo, pid int64) *statistics.Block { 267 statsCache := h.statsCache.Load().(statsCache) 268 tbl, ok := statsCache.blocks[pid] 269 if !ok { 270 tbl = statistics.PseudoTable(tblInfo) 271 tbl.PhysicalID = pid 272 h.uFIDelateStatsCache(statsCache.uFIDelate([]*statistics.Block{tbl}, nil, statsCache.version)) 273 return tbl 274 } 275 return tbl 276 } 277 278 // CanRuntimePrune indicates whether tbl support runtime prune for causet and first partition id. 279 func (h *Handle) CanRuntimePrune(tid, p0Id int64) bool { 280 if h == nil { 281 return false 282 } 283 if tid == p0Id { 284 return false 285 } 286 statsCache := h.statsCache.Load().(statsCache) 287 _, tblExists := statsCache.blocks[tid] 288 if tblExists { 289 return true 290 } 291 _, partExists := statsCache.blocks[p0Id] 292 if !partExists { 293 return true 294 } 295 return false 296 } 297 298 func (h *Handle) uFIDelateStatsCache(newCache statsCache) { 299 h.statsCache.Lock() 300 oldCache := h.statsCache.Load().(statsCache) 301 if oldCache.version <= newCache.version { 302 h.statsCache.memTracker.Consume(newCache.memUsage - oldCache.memUsage) 303 h.statsCache.CausetStore(newCache) 304 } 305 h.statsCache.Unlock() 306 } 307 308 func (sc statsCache) copy() statsCache { 309 newCache := statsCache{blocks: make(map[int64]*statistics.Block, len(sc.blocks)), 310 version: sc.version, 311 memUsage: sc.memUsage} 312 for k, v := range sc.blocks { 313 newCache.blocks[k] = v 314 } 315 return newCache 316 } 317 318 //initMemoryUsage calc total memory usage of statsCache and set statsCache.memUsage 319 //should be called after the blocks and their stats are initilazed 320 func (sc statsCache) initMemoryUsage() { 321 sum := int64(0) 322 for _, tb := range sc.blocks { 323 sum += tb.MemoryUsage() 324 } 325 sc.memUsage = sum 326 return 327 } 328 329 // uFIDelate uFIDelates the statistics causet cache using copy on write. 330 func (sc statsCache) uFIDelate(blocks []*statistics.Block, deletedIDs []int64, newVersion uint64) statsCache { 331 newCache := sc.copy() 332 newCache.version = newVersion 333 for _, tbl := range blocks { 334 id := tbl.PhysicalID 335 if ptbl, ok := newCache.blocks[id]; ok { 336 newCache.memUsage -= ptbl.MemoryUsage() 337 } 338 newCache.blocks[id] = tbl 339 newCache.memUsage += tbl.MemoryUsage() 340 } 341 for _, id := range deletedIDs { 342 if ptbl, ok := newCache.blocks[id]; ok { 343 newCache.memUsage -= ptbl.MemoryUsage() 344 } 345 delete(newCache.blocks, id) 346 } 347 return newCache 348 } 349 350 // LoadNeededHistograms will load histograms for those needed columns. 351 func (h *Handle) LoadNeededHistograms() (err error) { 352 defcaus := statistics.HistogramNeededDeferredCausets.AllDefCauss() 353 reader, err := h.getStatsReader(nil) 354 if err != nil { 355 return err 356 } 357 358 defer func() { 359 err1 := h.releaseStatsReader(reader) 360 if err1 != nil && err == nil { 361 err = err1 362 } 363 }() 364 365 for _, col := range defcaus { 366 statsCache := h.statsCache.Load().(statsCache) 367 tbl, ok := statsCache.blocks[col.TableID] 368 if !ok { 369 continue 370 } 371 tbl = tbl.Copy() 372 c, ok := tbl.DeferredCausets[col.DeferredCausetID] 373 if !ok || c.Len() > 0 { 374 statistics.HistogramNeededDeferredCausets.Delete(col) 375 continue 376 } 377 hg, err := h.histogramFromStorage(reader, col.TableID, c.ID, &c.Info.FieldType, c.NDV, 0, c.LastUFIDelateVersion, c.NullCount, c.TotDefCausSize, c.Correlation) 378 if err != nil { 379 return errors.Trace(err) 380 } 381 cms, err := h.cmSketchFromStorage(reader, col.TableID, 0, col.DeferredCausetID) 382 if err != nil { 383 return errors.Trace(err) 384 } 385 tbl.DeferredCausets[c.ID] = &statistics.DeferredCauset{ 386 PhysicalID: col.TableID, 387 Histogram: *hg, 388 Info: c.Info, 389 CMSketch: cms, 390 Count: int64(hg.TotalRowCount()), 391 IsHandle: c.IsHandle, 392 } 393 h.uFIDelateStatsCache(statsCache.uFIDelate([]*statistics.Block{tbl}, nil, statsCache.version)) 394 statistics.HistogramNeededDeferredCausets.Delete(col) 395 } 396 return nil 397 } 398 399 // LastUFIDelateVersion gets the last uFIDelate version. 400 func (h *Handle) LastUFIDelateVersion() uint64 { 401 return h.statsCache.Load().(statsCache).version 402 } 403 404 // SetLastUFIDelateVersion sets the last uFIDelate version. 405 func (h *Handle) SetLastUFIDelateVersion(version uint64) { 406 statsCache := h.statsCache.Load().(statsCache) 407 h.uFIDelateStatsCache(statsCache.uFIDelate(nil, nil, version)) 408 } 409 410 // FlushStats flushes the cached stats uFIDelate into causetstore. 411 func (h *Handle) FlushStats() { 412 for len(h.dbsEventCh) > 0 { 413 e := <-h.dbsEventCh 414 if err := h.HandleDBSEvent(e); err != nil { 415 logutil.BgLogger().Debug("[stats] handle dbs event fail", zap.Error(err)) 416 } 417 } 418 if err := h.DumpStatsDeltaToKV(DumpAll); err != nil { 419 logutil.BgLogger().Debug("[stats] dump stats delta fail", zap.Error(err)) 420 } 421 if err := h.DumpStatsFeedbackToKV(); err != nil { 422 logutil.BgLogger().Debug("[stats] dump stats feedback fail", zap.Error(err)) 423 } 424 } 425 426 func (h *Handle) cmSketchFromStorage(reader *statsReader, tblID int64, isIndex, histID int64) (_ *statistics.CMSketch, err error) { 427 selALLEGROSQL := fmt.Sprintf("select cm_sketch from allegrosql.stats_histograms where block_id = %d and is_index = %d and hist_id = %d", tblID, isIndex, histID) 428 rows, _, err := reader.read(selALLEGROSQL) 429 if err != nil || len(rows) == 0 { 430 return nil, err 431 } 432 selALLEGROSQL = fmt.Sprintf("select HIGH_PRIORITY value, count from allegrosql.stats_top_n where block_id = %d and is_index = %d and hist_id = %d", tblID, isIndex, histID) 433 topNRows, _, err := reader.read(selALLEGROSQL) 434 if err != nil { 435 return nil, err 436 } 437 return statistics.DecodeCMSketch(rows[0].GetBytes(0), topNRows) 438 } 439 440 func (h *Handle) indexStatsFromStorage(reader *statsReader, event chunk.Row, causet *statistics.Block, blockInfo *perceptron.TableInfo) error { 441 histID := event.GetInt64(2) 442 distinct := event.GetInt64(3) 443 histVer := event.GetUint64(4) 444 nullCount := event.GetInt64(5) 445 idx := causet.Indices[histID] 446 errorRate := statistics.ErrorRate{} 447 flag := event.GetInt64(8) 448 lastAnalyzePos := event.GetCauset(10, types.NewFieldType(allegrosql.TypeBlob)) 449 if statistics.IsAnalyzed(flag) && !reader.isHistory() { 450 h.mu.rateMap.clear(causet.PhysicalID, histID, true) 451 } else if idx != nil { 452 errorRate = idx.ErrorRate 453 } 454 for _, idxInfo := range blockInfo.Indices { 455 if histID != idxInfo.ID { 456 continue 457 } 458 if idx == nil || idx.LastUFIDelateVersion < histVer { 459 hg, err := h.histogramFromStorage(reader, causet.PhysicalID, histID, types.NewFieldType(allegrosql.TypeBlob), distinct, 1, histVer, nullCount, 0, 0) 460 if err != nil { 461 return errors.Trace(err) 462 } 463 cms, err := h.cmSketchFromStorage(reader, causet.PhysicalID, 1, idxInfo.ID) 464 if err != nil { 465 return errors.Trace(err) 466 } 467 idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: event.GetInt64(7), Flag: flag} 468 lastAnalyzePos.Copy(&idx.LastAnalyzePos) 469 } 470 break 471 } 472 if idx != nil { 473 causet.Indices[histID] = idx 474 } else { 475 logutil.BgLogger().Debug("we cannot find index id in causet info. It may be deleted.", zap.Int64("indexID", histID), zap.String("causet", blockInfo.Name.O)) 476 } 477 return nil 478 } 479 480 func (h *Handle) columnStatsFromStorage(reader *statsReader, event chunk.Row, causet *statistics.Block, blockInfo *perceptron.TableInfo, loadAll bool) error { 481 histID := event.GetInt64(2) 482 distinct := event.GetInt64(3) 483 histVer := event.GetUint64(4) 484 nullCount := event.GetInt64(5) 485 totDefCausSize := event.GetInt64(6) 486 correlation := event.GetFloat64(9) 487 lastAnalyzePos := event.GetCauset(10, types.NewFieldType(allegrosql.TypeBlob)) 488 col := causet.DeferredCausets[histID] 489 errorRate := statistics.ErrorRate{} 490 flag := event.GetInt64(8) 491 if statistics.IsAnalyzed(flag) && !reader.isHistory() { 492 h.mu.rateMap.clear(causet.PhysicalID, histID, false) 493 } else if col != nil { 494 errorRate = col.ErrorRate 495 } 496 for _, colInfo := range blockInfo.DeferredCausets { 497 if histID != colInfo.ID { 498 continue 499 } 500 isHandle := blockInfo.PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag) 501 // We will not load buckets if: 502 // 1. Lease > 0, and: 503 // 2. this column is not handle, and: 504 // 3. the column doesn't has buckets before, and: 505 // 4. loadAll is false. 506 notNeedLoad := h.Lease() > 0 && 507 !isHandle && 508 (col == nil || col.Len() == 0 && col.LastUFIDelateVersion < histVer) && 509 !loadAll 510 if notNeedLoad { 511 count, err := h.columnCountFromStorage(reader, causet.PhysicalID, histID) 512 if err != nil { 513 return errors.Trace(err) 514 } 515 col = &statistics.DeferredCauset{ 516 PhysicalID: causet.PhysicalID, 517 Histogram: *statistics.NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totDefCausSize), 518 Info: colInfo, 519 Count: count + nullCount, 520 ErrorRate: errorRate, 521 IsHandle: blockInfo.PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag), 522 Flag: flag, 523 } 524 lastAnalyzePos.Copy(&col.LastAnalyzePos) 525 col.Histogram.Correlation = correlation 526 break 527 } 528 if col == nil || col.LastUFIDelateVersion < histVer || loadAll { 529 hg, err := h.histogramFromStorage(reader, causet.PhysicalID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totDefCausSize, correlation) 530 if err != nil { 531 return errors.Trace(err) 532 } 533 cms, err := h.cmSketchFromStorage(reader, causet.PhysicalID, 0, colInfo.ID) 534 if err != nil { 535 return errors.Trace(err) 536 } 537 col = &statistics.DeferredCauset{ 538 PhysicalID: causet.PhysicalID, 539 Histogram: *hg, 540 Info: colInfo, 541 CMSketch: cms, 542 Count: int64(hg.TotalRowCount()), 543 ErrorRate: errorRate, 544 IsHandle: blockInfo.PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag), 545 Flag: flag, 546 } 547 lastAnalyzePos.Copy(&col.LastAnalyzePos) 548 break 549 } 550 if col.TotDefCausSize != totDefCausSize { 551 newDefCaus := *col 552 newDefCaus.TotDefCausSize = totDefCausSize 553 col = &newDefCaus 554 } 555 break 556 } 557 if col != nil { 558 causet.DeferredCausets[col.ID] = col 559 } else { 560 // If we didn't find a DeferredCauset or Index in blockInfo, we won't load the histogram for it. 561 // But don't worry, next lease the dbs will be uFIDelated, and we will load a same causet for two times to 562 // avoid error. 563 logutil.BgLogger().Debug("we cannot find column in causet info now. It may be deleted", zap.Int64("colID", histID), zap.String("causet", blockInfo.Name.O)) 564 } 565 return nil 566 } 567 568 // blockStatsFromStorage loads causet stats info from storage. 569 func (h *Handle) blockStatsFromStorage(blockInfo *perceptron.TableInfo, physicalID int64, loadAll bool, historyStatsInterDirc sqlexec.RestrictedALLEGROSQLInterlockingDirectorate) (_ *statistics.Block, err error) { 570 reader, err := h.getStatsReader(historyStatsInterDirc) 571 if err != nil { 572 return nil, err 573 } 574 defer func() { 575 err1 := h.releaseStatsReader(reader) 576 if err == nil && err1 != nil { 577 err = err1 578 } 579 }() 580 causet, ok := h.statsCache.Load().(statsCache).blocks[physicalID] 581 // If causet stats is pseudo, we also need to copy it, since we will use the column stats when 582 // the average error rate of it is small. 583 if !ok || historyStatsInterDirc != nil { 584 histDefCausl := statistics.HistDefCausl{ 585 PhysicalID: physicalID, 586 HavePhysicalID: true, 587 DeferredCausets: make(map[int64]*statistics.DeferredCauset, len(blockInfo.DeferredCausets)), 588 Indices: make(map[int64]*statistics.Index, len(blockInfo.Indices)), 589 } 590 causet = &statistics.Block{ 591 HistDefCausl: histDefCausl, 592 } 593 } else { 594 // We copy it before writing to avoid race. 595 causet = causet.Copy() 596 } 597 causet.Pseudo = false 598 selALLEGROSQL := fmt.Sprintf("select block_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from allegrosql.stats_histograms where block_id = %d", physicalID) 599 rows, _, err := reader.read(selALLEGROSQL) 600 // Check deleted causet. 601 if err != nil || len(rows) == 0 { 602 return nil, nil 603 } 604 for _, event := range rows { 605 if event.GetInt64(1) > 0 { 606 err = h.indexStatsFromStorage(reader, event, causet, blockInfo) 607 } else { 608 err = h.columnStatsFromStorage(reader, event, causet, blockInfo, loadAll) 609 } 610 if err != nil { 611 return nil, err 612 } 613 } 614 return h.extendedStatsFromStorage(reader, causet, physicalID, loadAll) 615 } 616 617 func (h *Handle) extendedStatsFromStorage(reader *statsReader, causet *statistics.Block, physicalID int64, loadAll bool) (*statistics.Block, error) { 618 lastVersion := uint64(0) 619 if causet.ExtendedStats != nil && !loadAll { 620 lastVersion = causet.ExtendedStats.LastUFIDelateVersion 621 } else { 622 causet.ExtendedStats = statistics.NewExtendedStatsDefCausl() 623 } 624 allegrosql := fmt.Sprintf("select stats_name, EDB, status, type, column_ids, scalar_stats, blob_stats, version from allegrosql.stats_extended where block_id = %d and status in (%d, %d) and version > %d", physicalID, StatsStatusAnalyzed, StatsStatusDeleted, lastVersion) 625 rows, _, err := reader.read(allegrosql) 626 if err != nil || len(rows) == 0 { 627 return causet, nil 628 } 629 for _, event := range rows { 630 lastVersion = mathutil.MaxUint64(lastVersion, event.GetUint64(7)) 631 key := statistics.ExtendedStatsKey{ 632 StatsName: event.GetString(0), 633 EDB: event.GetString(1), 634 } 635 status := uint8(event.GetInt64(2)) 636 if status == StatsStatusDeleted { 637 delete(causet.ExtendedStats.Stats, key) 638 } else { 639 item := &statistics.ExtendedStatsItem{ 640 Tp: uint8(event.GetInt64(3)), 641 ScalarVals: event.GetFloat64(5), 642 StringVals: event.GetString(6), 643 } 644 colIDs := event.GetString(4) 645 err := json.Unmarshal([]byte(colIDs), &item.DefCausIDs) 646 if err != nil { 647 logutil.BgLogger().Debug("decode column IDs failed", zap.String("column_ids", colIDs), zap.Error(err)) 648 return nil, err 649 } 650 causet.ExtendedStats.Stats[key] = item 651 } 652 } 653 causet.ExtendedStats.LastUFIDelateVersion = lastVersion 654 return causet, nil 655 } 656 657 // SaveStatsToStorage saves the stats to storage. 658 func (h *Handle) SaveStatsToStorage(blockID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, isAnalyzed int64) (err error) { 659 h.mu.Lock() 660 defer h.mu.Unlock() 661 ctx := context.TODO() 662 exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate) 663 _, err = exec.InterDircute(ctx, "begin") 664 if err != nil { 665 return errors.Trace(err) 666 } 667 defer func() { 668 err = finishTransaction(context.Background(), exec, err) 669 }() 670 txn, err := h.mu.ctx.Txn(true) 671 if err != nil { 672 return errors.Trace(err) 673 } 674 675 version := txn.StartTS() 676 sqls := make([]string, 0, 4) 677 // If the count is less than 0, then we do not want to uFIDelate the modify count and count. 678 if count >= 0 { 679 sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_spacetime (version, block_id, count) values (%d, %d, %d)", version, blockID, count)) 680 } else { 681 sqls = append(sqls, fmt.Sprintf("uFIDelate allegrosql.stats_spacetime set version = %d where block_id = %d", version, blockID)) 682 } 683 data, err := statistics.EncodeCMSketchWithoutTopN(cms) 684 if err != nil { 685 return 686 } 687 // Delete outdated data 688 sqls = append(sqls, fmt.Sprintf("delete from allegrosql.stats_top_n where block_id = %d and is_index = %d and hist_id = %d", blockID, isIndex, hg.ID)) 689 for _, spacetime := range cms.TopN() { 690 sqls = append(sqls, fmt.Sprintf("insert into allegrosql.stats_top_n (block_id, is_index, hist_id, value, count) values (%d, %d, %d, X'%X', %d)", blockID, isIndex, hg.ID, spacetime.Data, spacetime.Count)) 691 } 692 flag := 0 693 if isAnalyzed == 1 { 694 flag = statistics.AnalyzeFlag 695 } 696 sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_histograms (block_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)", 697 blockID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotDefCausSize, statistics.CurStatsVersion, flag, hg.Correlation)) 698 sqls = append(sqls, fmt.Sprintf("delete from allegrosql.stats_buckets where block_id = %d and is_index = %d and hist_id = %d", blockID, isIndex, hg.ID)) 699 sc := h.mu.ctx.GetStochastikVars().StmtCtx 700 var lastAnalyzePos []byte 701 for i := range hg.Buckets { 702 count := hg.Buckets[i].Count 703 if i > 0 { 704 count -= hg.Buckets[i-1].Count 705 } 706 var upperBound types.Causet 707 upperBound, err = hg.GetUpper(i).ConvertTo(sc, types.NewFieldType(allegrosql.TypeBlob)) 708 if err != nil { 709 return 710 } 711 if i == len(hg.Buckets)-1 { 712 lastAnalyzePos = upperBound.GetBytes() 713 } 714 var lowerBound types.Causet 715 lowerBound, err = hg.GetLower(i).ConvertTo(sc, types.NewFieldType(allegrosql.TypeBlob)) 716 if err != nil { 717 return 718 } 719 sqls = append(sqls, fmt.Sprintf("insert into allegrosql.stats_buckets(block_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X')", blockID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes())) 720 } 721 if isAnalyzed == 1 && len(lastAnalyzePos) > 0 { 722 sqls = append(sqls, fmt.Sprintf("uFIDelate allegrosql.stats_histograms set last_analyze_pos = X'%X' where block_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, blockID, isIndex, hg.ID)) 723 } 724 return execALLEGROSQLs(context.Background(), exec, sqls) 725 } 726 727 // SaveMetaToStorage will save stats_spacetime to storage. 728 func (h *Handle) SaveMetaToStorage(blockID, count, modifyCount int64) (err error) { 729 h.mu.Lock() 730 defer h.mu.Unlock() 731 ctx := context.TODO() 732 exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate) 733 _, err = exec.InterDircute(ctx, "begin") 734 if err != nil { 735 return errors.Trace(err) 736 } 737 defer func() { 738 err = finishTransaction(ctx, exec, err) 739 }() 740 txn, err := h.mu.ctx.Txn(true) 741 if err != nil { 742 return errors.Trace(err) 743 } 744 var allegrosql string 745 version := txn.StartTS() 746 allegrosql = fmt.Sprintf("replace into allegrosql.stats_spacetime (version, block_id, count, modify_count) values (%d, %d, %d, %d)", version, blockID, count, modifyCount) 747 _, err = exec.InterDircute(ctx, allegrosql) 748 return 749 } 750 751 func (h *Handle) histogramFromStorage(reader *statsReader, blockID int64, colID int64, tp *types.FieldType, distinct int64, isIndex int, ver uint64, nullCount int64, totDefCausSize int64, corr float64) (_ *statistics.Histogram, err error) { 752 selALLEGROSQL := fmt.Sprintf("select count, repeats, lower_bound, upper_bound from allegrosql.stats_buckets where block_id = %d and is_index = %d and hist_id = %d order by bucket_id", blockID, isIndex, colID) 753 rows, fields, err := reader.read(selALLEGROSQL) 754 if err != nil { 755 return nil, errors.Trace(err) 756 } 757 bucketSize := len(rows) 758 hg := statistics.NewHistogram(colID, distinct, nullCount, ver, tp, bucketSize, totDefCausSize) 759 hg.Correlation = corr 760 totalCount := int64(0) 761 for i := 0; i < bucketSize; i++ { 762 count := rows[i].GetInt64(0) 763 repeats := rows[i].GetInt64(1) 764 var upperBound, lowerBound types.Causet 765 if isIndex == 1 { 766 lowerBound = rows[i].GetCauset(2, &fields[2].DeferredCauset.FieldType) 767 upperBound = rows[i].GetCauset(3, &fields[3].DeferredCauset.FieldType) 768 } else { 769 sc := &stmtctx.StatementContext{TimeZone: time.UTC} 770 d := rows[i].GetCauset(2, &fields[2].DeferredCauset.FieldType) 771 lowerBound, err = d.ConvertTo(sc, tp) 772 if err != nil { 773 return nil, errors.Trace(err) 774 } 775 d = rows[i].GetCauset(3, &fields[3].DeferredCauset.FieldType) 776 upperBound, err = d.ConvertTo(sc, tp) 777 if err != nil { 778 return nil, errors.Trace(err) 779 } 780 } 781 totalCount += count 782 hg.AppendBucket(&lowerBound, &upperBound, totalCount, repeats) 783 } 784 hg.PreCalculateScalar() 785 return hg, nil 786 } 787 788 func (h *Handle) columnCountFromStorage(reader *statsReader, blockID, colID int64) (int64, error) { 789 selALLEGROSQL := fmt.Sprintf("select sum(count) from allegrosql.stats_buckets where block_id = %d and is_index = %d and hist_id = %d", blockID, 0, colID) 790 rows, _, err := reader.read(selALLEGROSQL) 791 if err != nil { 792 return 0, errors.Trace(err) 793 } 794 if rows[0].IsNull(0) { 795 return 0, nil 796 } 797 return rows[0].GetMyDecimal(0).ToInt() 798 } 799 800 func (h *Handle) statsMetaByTableIDFromStorage(blockID int64, historyStatsInterDirc sqlexec.RestrictedALLEGROSQLInterlockingDirectorate) (version uint64, modifyCount, count int64, err error) { 801 selALLEGROSQL := fmt.Sprintf("SELECT version, modify_count, count from allegrosql.stats_spacetime where block_id = %d order by version", blockID) 802 var rows []chunk.Row 803 if historyStatsInterDirc == nil { 804 rows, _, err = h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(selALLEGROSQL) 805 } else { 806 rows, _, err = historyStatsInterDirc.InterDircRestrictedALLEGROSQLWithSnapshot(selALLEGROSQL) 807 } 808 if err != nil || len(rows) == 0 { 809 return 810 } 811 version = rows[0].GetUint64(0) 812 modifyCount = rows[0].GetInt64(1) 813 count = rows[0].GetInt64(2) 814 return 815 } 816 817 // statsReader is used for simplify code that needs to read system blocks in different sqls 818 // but requires the same transactions. 819 type statsReader struct { 820 ctx stochastikctx.Context 821 history sqlexec.RestrictedALLEGROSQLInterlockingDirectorate 822 } 823 824 func (sr *statsReader) read(allegrosql string) (rows []chunk.Row, fields []*ast.ResultField, err error) { 825 if sr.history != nil { 826 return sr.history.InterDircRestrictedALLEGROSQLWithSnapshot(allegrosql) 827 } 828 rc, err := sr.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 829 if len(rc) > 0 { 830 defer terror.Call(rc[0].Close) 831 } 832 if err != nil { 833 return nil, nil, err 834 } 835 for { 836 req := rc[0].NewChunk() 837 err := rc[0].Next(context.TODO(), req) 838 if err != nil { 839 return nil, nil, err 840 } 841 if req.NumRows() == 0 { 842 break 843 } 844 for i := 0; i < req.NumRows(); i++ { 845 rows = append(rows, req.GetRow(i)) 846 } 847 } 848 return rows, rc[0].Fields(), nil 849 } 850 851 func (sr *statsReader) isHistory() bool { 852 return sr.history != nil 853 } 854 855 func (h *Handle) getStatsReader(history sqlexec.RestrictedALLEGROSQLInterlockingDirectorate) (*statsReader, error) { 856 failpoint.Inject("mockGetStatsReaderFail", func(val failpoint.Value) { 857 if val.(bool) { 858 failpoint.Return(nil, errors.New("gofail genStatsReader error")) 859 } 860 }) 861 if history != nil { 862 return &statsReader{history: history}, nil 863 } 864 h.mu.Lock() 865 _, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "begin") 866 if err != nil { 867 return nil, err 868 } 869 return &statsReader{ctx: h.mu.ctx}, nil 870 } 871 872 func (h *Handle) releaseStatsReader(reader *statsReader) error { 873 if reader.history != nil { 874 return nil 875 } 876 _, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "commit") 877 h.mu.Unlock() 878 return err 879 } 880 881 const ( 882 // StatsStatusInited is the status for extended stats which are just registered but have not been analyzed yet. 883 StatsStatusInited uint8 = iota 884 // StatsStatusAnalyzed is the status for extended stats which have been collected in analyze. 885 StatsStatusAnalyzed 886 // StatsStatusDeleted is the status for extended stats which were dropped. These "deleted" records would be removed from storage by GCStats(). 887 StatsStatusDeleted 888 ) 889 890 // InsertExtendedStats inserts a record into allegrosql.stats_extended and uFIDelate version in allegrosql.stats_spacetime. 891 func (h *Handle) InsertExtendedStats(statsName, EDB string, colIDs []int64, tp int, blockID int64, ifNotExists bool) (err error) { 892 bytes, err := json.Marshal(colIDs) 893 if err != nil { 894 return errors.Trace(err) 895 } 896 strDefCausIDs := string(bytes) 897 h.mu.Lock() 898 defer h.mu.Unlock() 899 ctx := context.TODO() 900 exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate) 901 _, err = exec.InterDircute(ctx, "begin pessimistic") 902 if err != nil { 903 return errors.Trace(err) 904 } 905 defer func() { 906 err = finishTransaction(ctx, exec, err) 907 }() 908 txn, err := h.mu.ctx.Txn(true) 909 if err != nil { 910 return errors.Trace(err) 911 } 912 version := txn.StartTS() 913 allegrosql := fmt.Sprintf("INSERT INTO allegrosql.stats_extended(stats_name, EDB, type, block_id, column_ids, version, status) VALUES ('%s', '%s', %d, %d, '%s', %d, %d)", statsName, EDB, tp, blockID, strDefCausIDs, version, StatsStatusInited) 914 _, err = exec.InterDircute(ctx, allegrosql) 915 // Key exists, but `if not exists` is specified, so we ignore this error. 916 if ekv.ErrKeyExists.Equal(err) && ifNotExists { 917 err = nil 918 } 919 return 920 } 921 922 // MarkExtendedStatsDeleted uFIDelate the status of allegrosql.stats_extended to be `deleted` and the version of allegrosql.stats_spacetime. 923 func (h *Handle) MarkExtendedStatsDeleted(statsName, EDB string, blockID int64) (err error) { 924 if blockID < 0 { 925 allegrosql := fmt.Sprintf("SELECT block_id FROM allegrosql.stats_extended WHERE stats_name = '%s' and EDB = '%s'", statsName, EDB) 926 rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql) 927 if err != nil { 928 return errors.Trace(err) 929 } 930 if len(rows) == 0 { 931 return nil 932 } 933 blockID = rows[0].GetInt64(0) 934 } 935 h.mu.Lock() 936 defer h.mu.Unlock() 937 ctx := context.TODO() 938 exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate) 939 _, err = exec.InterDircute(ctx, "begin pessimistic") 940 if err != nil { 941 return errors.Trace(err) 942 } 943 defer func() { 944 err = finishTransaction(ctx, exec, err) 945 }() 946 txn, err := h.mu.ctx.Txn(true) 947 if err != nil { 948 return errors.Trace(err) 949 } 950 version := txn.StartTS() 951 sqls := make([]string, 2) 952 sqls[0] = fmt.Sprintf("UFIDelATE allegrosql.stats_extended SET version = %d, status = %d WHERE stats_name = '%s' and EDB = '%s'", version, StatsStatusDeleted, statsName, EDB) 953 sqls[1] = fmt.Sprintf("UFIDelATE allegrosql.stats_spacetime SET version = %d WHERE block_id = %d", version, blockID) 954 return execALLEGROSQLs(ctx, exec, sqls) 955 } 956 957 // ReloadExtendedStatistics drops the cache for extended statistics and reload data from allegrosql.stats_extended. 958 func (h *Handle) ReloadExtendedStatistics() error { 959 reader, err := h.getStatsReader(nil) 960 if err != nil { 961 return err 962 } 963 oldCache := h.statsCache.Load().(statsCache) 964 blocks := make([]*statistics.Block, 0, len(oldCache.blocks)) 965 for physicalID, tbl := range oldCache.blocks { 966 t, err := h.extendedStatsFromStorage(reader, tbl.Copy(), physicalID, true) 967 if err != nil { 968 return err 969 } 970 blocks = append(blocks, t) 971 } 972 err = h.releaseStatsReader(reader) 973 if err != nil { 974 return err 975 } 976 // Note that this uFIDelate may fail when the statsCache.version has been modified by others. 977 h.uFIDelateStatsCache(oldCache.uFIDelate(blocks, nil, oldCache.version)) 978 return nil 979 } 980 981 // BuildExtendedStats build extended stats for column groups if needed based on the column samples. 982 func (h *Handle) BuildExtendedStats(blockID int64, defcaus []*perceptron.DeferredCausetInfo, collectors []*statistics.SampleDefCauslector) (*statistics.ExtendedStatsDefCausl, error) { 983 allegrosql := fmt.Sprintf("SELECT stats_name, EDB, type, column_ids FROM allegrosql.stats_extended WHERE block_id = %d and status in (%d, %d)", blockID, StatsStatusAnalyzed, StatsStatusInited) 984 rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql) 985 if err != nil { 986 return nil, errors.Trace(err) 987 } 988 if len(rows) == 0 { 989 return nil, nil 990 } 991 statsDefCausl := statistics.NewExtendedStatsDefCausl() 992 for _, event := range rows { 993 key := statistics.ExtendedStatsKey{ 994 StatsName: event.GetString(0), 995 EDB: event.GetString(1), 996 } 997 item := &statistics.ExtendedStatsItem{Tp: uint8(event.GetInt64(2))} 998 colIDs := event.GetString(3) 999 err := json.Unmarshal([]byte(colIDs), &item.DefCausIDs) 1000 if err != nil { 1001 logutil.BgLogger().Error("invalid column_ids in allegrosql.stats_extended, skip collecting extended stats for this event", zap.String("column_ids", colIDs), zap.Error(err)) 1002 continue 1003 } 1004 item = h.fillExtendedStatsItemVals(item, defcaus, collectors) 1005 if item != nil { 1006 statsDefCausl.Stats[key] = item 1007 } 1008 } 1009 if len(statsDefCausl.Stats) == 0 { 1010 return nil, nil 1011 } 1012 return statsDefCausl, nil 1013 } 1014 1015 func (h *Handle) fillExtendedStatsItemVals(item *statistics.ExtendedStatsItem, defcaus []*perceptron.DeferredCausetInfo, collectors []*statistics.SampleDefCauslector) *statistics.ExtendedStatsItem { 1016 switch item.Tp { 1017 case ast.StatsTypeCardinality, ast.StatsTypeDependency: 1018 return nil 1019 case ast.StatsTypeCorrelation: 1020 return h.fillExtStatsCorrVals(item, defcaus, collectors) 1021 } 1022 return nil 1023 } 1024 1025 func (h *Handle) fillExtStatsCorrVals(item *statistics.ExtendedStatsItem, defcaus []*perceptron.DeferredCausetInfo, collectors []*statistics.SampleDefCauslector) *statistics.ExtendedStatsItem { 1026 colOffsets := make([]int, 0, 2) 1027 for _, id := range item.DefCausIDs { 1028 for i, col := range defcaus { 1029 if col.ID == id { 1030 colOffsets = append(colOffsets, i) 1031 break 1032 } 1033 } 1034 } 1035 if len(colOffsets) != 2 { 1036 return nil 1037 } 1038 // samplesX and samplesY are in order of handle, i.e, their SampleItem.Ordinals are in order. 1039 samplesX := collectors[colOffsets[0]].Samples 1040 // We would modify Ordinal of samplesY, so we make a deep copy. 1041 samplesY := statistics.CopySampleItems(collectors[colOffsets[1]].Samples) 1042 sampleNum := len(samplesX) 1043 if sampleNum == 1 { 1044 item.ScalarVals = float64(1) 1045 return item 1046 } 1047 h.mu.Lock() 1048 sc := h.mu.ctx.GetStochastikVars().StmtCtx 1049 h.mu.Unlock() 1050 var err error 1051 samplesX, err = statistics.SortSampleItems(sc, samplesX) 1052 if err != nil { 1053 return nil 1054 } 1055 samplesYInXOrder := make([]*statistics.SampleItem, sampleNum) 1056 for i, itemX := range samplesX { 1057 itemY := samplesY[itemX.Ordinal] 1058 itemY.Ordinal = i 1059 samplesYInXOrder[i] = itemY 1060 } 1061 samplesYInYOrder, err := statistics.SortSampleItems(sc, samplesYInXOrder) 1062 if err != nil { 1063 return nil 1064 } 1065 var corrXYSum float64 1066 for i := 1; i < sampleNum; i++ { 1067 corrXYSum += float64(i) * float64(samplesYInYOrder[i].Ordinal) 1068 } 1069 // X means the ordinal of the item in original sequence, Y means the oridnal of the item in the 1070 // sorted sequence, we know that X and Y value sets are both: 1071 // 0, 1, ..., sampleNum-1 1072 // we can simply compute sum(X) = sum(Y) = 1073 // (sampleNum-1)*sampleNum / 2 1074 // and sum(X^2) = sum(Y^2) = 1075 // (sampleNum-1)*sampleNum*(2*sampleNum-1) / 6 1076 // We use "Pearson correlation coefficient" to compute the order correlation of columns, 1077 // the formula is based on https://en.wikipedia.org/wiki/Pearson_correlation_coefficient. 1078 // Note that (itemsCount*corrX2Sum - corrXSum*corrXSum) would never be zero when sampleNum is larger than 1. 1079 itemsCount := float64(sampleNum) 1080 corrXSum := (itemsCount - 1) * itemsCount / 2.0 1081 corrX2Sum := (itemsCount - 1) * itemsCount * (2*itemsCount - 1) / 6.0 1082 item.ScalarVals = (itemsCount*corrXYSum - corrXSum*corrXSum) / (itemsCount*corrX2Sum - corrXSum*corrXSum) 1083 return item 1084 } 1085 1086 // SaveExtendedStatsToStorage writes extended stats of a causet into allegrosql.stats_extended. 1087 func (h *Handle) SaveExtendedStatsToStorage(blockID int64, extStats *statistics.ExtendedStatsDefCausl, isLoad bool) (err error) { 1088 if extStats == nil || len(extStats.Stats) == 0 { 1089 return nil 1090 } 1091 h.mu.Lock() 1092 defer h.mu.Unlock() 1093 ctx := context.TODO() 1094 exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate) 1095 _, err = exec.InterDircute(ctx, "begin pessimistic") 1096 if err != nil { 1097 return errors.Trace(err) 1098 } 1099 defer func() { 1100 err = finishTransaction(ctx, exec, err) 1101 }() 1102 txn, err := h.mu.ctx.Txn(true) 1103 if err != nil { 1104 return errors.Trace(err) 1105 } 1106 version := txn.StartTS() 1107 sqls := make([]string, 0, 1+len(extStats.Stats)) 1108 for key, item := range extStats.Stats { 1109 bytes, err := json.Marshal(item.DefCausIDs) 1110 if err != nil { 1111 return errors.Trace(err) 1112 } 1113 strDefCausIDs := string(bytes) 1114 switch item.Tp { 1115 case ast.StatsTypeCardinality, ast.StatsTypeCorrelation: 1116 // If isLoad is true, it's INSERT; otherwise, it's UFIDelATE. 1117 sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_extended values ('%s', '%s', %d, %d, '%s', %f, null, %d, %d)", key.StatsName, key.EDB, item.Tp, blockID, strDefCausIDs, item.ScalarVals, version, StatsStatusAnalyzed)) 1118 case ast.StatsTypeDependency: 1119 sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_extended values ('%s', '%s', %d, %d, '%s', null, '%s', %d, %d)", key.StatsName, key.EDB, item.Tp, blockID, strDefCausIDs, item.StringVals, version, StatsStatusAnalyzed)) 1120 } 1121 } 1122 if !isLoad { 1123 sqls = append(sqls, fmt.Sprintf("UFIDelATE allegrosql.stats_spacetime SET version = %d WHERE block_id = %d", version, blockID)) 1124 } 1125 return execALLEGROSQLs(ctx, exec, sqls) 1126 }