github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/handle/bootstrap.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package handle 15 16 import ( 17 "context" 18 "fmt" 19 20 "github.com/cznic/mathutil" 21 "github.com/whtcorpsinc/errors" 22 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 23 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 24 "github.com/whtcorpsinc/BerolinaSQL/terror" 25 "github.com/whtcorpsinc/milevadb/schemareplicant" 26 "github.com/whtcorpsinc/milevadb/stochastikctx" 27 "github.com/whtcorpsinc/milevadb/statistics" 28 "github.com/whtcorpsinc/milevadb/types" 29 "github.com/whtcorpsinc/milevadb/soliton/chunk" 30 "github.com/whtcorpsinc/milevadb/soliton/logutil" 31 "github.com/whtcorpsinc/milevadb/soliton/sqlexec" 32 "go.uber.org/zap" 33 ) 34 35 func (h *Handle) initStatsMeta4Chunk(is schemareplicant.SchemaReplicant, cache *statsCache, iter *chunk.Iterator4Chunk) { 36 for event := iter.Begin(); event != iter.End(); event = iter.Next() { 37 physicalID := event.GetInt64(1) 38 causet, ok := h.getTableByPhysicalID(is, physicalID) 39 if !ok { 40 logutil.BgLogger().Debug("unknown physical ID in stats spacetime causet, maybe it has been dropped", zap.Int64("ID", physicalID)) 41 continue 42 } 43 blockInfo := causet.Meta() 44 newHistDefCausl := statistics.HistDefCausl{ 45 PhysicalID: physicalID, 46 HavePhysicalID: true, 47 Count: event.GetInt64(3), 48 ModifyCount: event.GetInt64(2), 49 DeferredCausets: make(map[int64]*statistics.DeferredCauset, len(blockInfo.DeferredCausets)), 50 Indices: make(map[int64]*statistics.Index, len(blockInfo.Indices)), 51 } 52 tbl := &statistics.Block{ 53 HistDefCausl: newHistDefCausl, 54 Version: event.GetUint64(0), 55 Name: getFullTableName(is, blockInfo), 56 } 57 cache.blocks[physicalID] = tbl 58 } 59 } 60 61 func (h *Handle) initStatsMeta(is schemareplicant.SchemaReplicant) (statsCache, error) { 62 allegrosql := "select HIGH_PRIORITY version, block_id, modify_count, count from allegrosql.stats_spacetime" 63 rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 64 if len(rc) > 0 { 65 defer terror.Call(rc[0].Close) 66 } 67 if err != nil { 68 return statsCache{}, errors.Trace(err) 69 } 70 blocks := statsCache{blocks: make(map[int64]*statistics.Block)} 71 req := rc[0].NewChunk() 72 iter := chunk.NewIterator4Chunk(req) 73 for { 74 err := rc[0].Next(context.TODO(), req) 75 if err != nil { 76 return statsCache{}, errors.Trace(err) 77 } 78 if req.NumRows() == 0 { 79 break 80 } 81 h.initStatsMeta4Chunk(is, &blocks, iter) 82 } 83 return blocks, nil 84 } 85 86 func (h *Handle) initStatsHistograms4Chunk(is schemareplicant.SchemaReplicant, cache *statsCache, iter *chunk.Iterator4Chunk) { 87 for event := iter.Begin(); event != iter.End(); event = iter.Next() { 88 causet, ok := cache.blocks[event.GetInt64(0)] 89 if !ok { 90 continue 91 } 92 id, ndv, nullCount, version, totDefCausSize := event.GetInt64(2), event.GetInt64(3), event.GetInt64(5), event.GetUint64(4), event.GetInt64(7) 93 lastAnalyzePos := event.GetCauset(11, types.NewFieldType(allegrosql.TypeBlob)) 94 tbl, _ := h.getTableByPhysicalID(is, causet.PhysicalID) 95 if event.GetInt64(1) > 0 { 96 var idxInfo *perceptron.IndexInfo 97 for _, idx := range tbl.Meta().Indices { 98 if idx.ID == id { 99 idxInfo = idx 100 break 101 } 102 } 103 if idxInfo == nil { 104 continue 105 } 106 cms, err := statistics.DecodeCMSketch(event.GetBytes(6), nil) 107 if err != nil { 108 cms = nil 109 terror.Log(errors.Trace(err)) 110 } 111 hist := statistics.NewHistogram(id, ndv, nullCount, version, types.NewFieldType(allegrosql.TypeBlob), chunk.InitialCapacity, 0) 112 index := &statistics.Index{ 113 Histogram: *hist, 114 CMSketch: cms, 115 Info: idxInfo, 116 StatsVer: event.GetInt64(8), 117 Flag: event.GetInt64(10), 118 } 119 lastAnalyzePos.Copy(&index.LastAnalyzePos) 120 causet.Indices[hist.ID] = index 121 } else { 122 var colInfo *perceptron.DeferredCausetInfo 123 for _, col := range tbl.Meta().DeferredCausets { 124 if col.ID == id { 125 colInfo = col 126 break 127 } 128 } 129 if colInfo == nil { 130 continue 131 } 132 hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, totDefCausSize) 133 hist.Correlation = event.GetFloat64(9) 134 col := &statistics.DeferredCauset{ 135 Histogram: *hist, 136 PhysicalID: causet.PhysicalID, 137 Info: colInfo, 138 Count: nullCount, 139 IsHandle: tbl.Meta().PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag), 140 Flag: event.GetInt64(10), 141 } 142 lastAnalyzePos.Copy(&col.LastAnalyzePos) 143 causet.DeferredCausets[hist.ID] = col 144 } 145 } 146 } 147 148 func (h *Handle) initStatsHistograms(is schemareplicant.SchemaReplicant, cache *statsCache) error { 149 allegrosql := "select HIGH_PRIORITY block_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from allegrosql.stats_histograms" 150 rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 151 if len(rc) > 0 { 152 defer terror.Call(rc[0].Close) 153 } 154 if err != nil { 155 return errors.Trace(err) 156 } 157 req := rc[0].NewChunk() 158 iter := chunk.NewIterator4Chunk(req) 159 for { 160 err := rc[0].Next(context.TODO(), req) 161 if err != nil { 162 return errors.Trace(err) 163 } 164 if req.NumRows() == 0 { 165 break 166 } 167 h.initStatsHistograms4Chunk(is, cache, iter) 168 } 169 return nil 170 } 171 172 func (h *Handle) initStatsTopN4Chunk(cache *statsCache, iter *chunk.Iterator4Chunk) { 173 for event := iter.Begin(); event != iter.End(); event = iter.Next() { 174 causet, ok := cache.blocks[event.GetInt64(0)] 175 if !ok { 176 continue 177 } 178 idx, ok := causet.Indices[event.GetInt64(1)] 179 if !ok || idx.CMSketch == nil { 180 continue 181 } 182 data := make([]byte, len(event.GetBytes(2))) 183 copy(data, event.GetBytes(2)) 184 idx.CMSketch.AppendTopN(data, event.GetUint64(3)) 185 } 186 } 187 188 func (h *Handle) initStatsTopN(cache *statsCache) error { 189 allegrosql := "select HIGH_PRIORITY block_id, hist_id, value, count from allegrosql.stats_top_n where is_index = 1" 190 rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 191 if len(rc) > 0 { 192 defer terror.Call(rc[0].Close) 193 } 194 if err != nil { 195 return errors.Trace(err) 196 } 197 req := rc[0].NewChunk() 198 iter := chunk.NewIterator4Chunk(req) 199 for { 200 err := rc[0].Next(context.TODO(), req) 201 if err != nil { 202 return errors.Trace(err) 203 } 204 if req.NumRows() == 0 { 205 break 206 } 207 h.initStatsTopN4Chunk(cache, iter) 208 } 209 return nil 210 } 211 212 func initStatsBuckets4Chunk(ctx stochastikctx.Context, cache *statsCache, iter *chunk.Iterator4Chunk) { 213 for event := iter.Begin(); event != iter.End(); event = iter.Next() { 214 blockID, isIndex, histID := event.GetInt64(0), event.GetInt64(1), event.GetInt64(2) 215 causet, ok := cache.blocks[blockID] 216 if !ok { 217 continue 218 } 219 var lower, upper types.Causet 220 var hist *statistics.Histogram 221 if isIndex > 0 { 222 index, ok := causet.Indices[histID] 223 if !ok { 224 continue 225 } 226 hist = &index.Histogram 227 lower, upper = types.NewBytesCauset(event.GetBytes(5)), types.NewBytesCauset(event.GetBytes(6)) 228 } else { 229 column, ok := causet.DeferredCausets[histID] 230 if !ok { 231 continue 232 } 233 column.Count += event.GetInt64(3) 234 if !allegrosql.HasPriKeyFlag(column.Info.Flag) { 235 continue 236 } 237 hist = &column.Histogram 238 d := types.NewBytesCauset(event.GetBytes(5)) 239 var err error 240 lower, err = d.ConvertTo(ctx.GetStochastikVars().StmtCtx, &column.Info.FieldType) 241 if err != nil { 242 logutil.BgLogger().Debug("decode bucket lower bound failed", zap.Error(err)) 243 delete(causet.DeferredCausets, histID) 244 continue 245 } 246 d = types.NewBytesCauset(event.GetBytes(6)) 247 upper, err = d.ConvertTo(ctx.GetStochastikVars().StmtCtx, &column.Info.FieldType) 248 if err != nil { 249 logutil.BgLogger().Debug("decode bucket upper bound failed", zap.Error(err)) 250 delete(causet.DeferredCausets, histID) 251 continue 252 } 253 } 254 hist.AppendBucket(&lower, &upper, event.GetInt64(3), event.GetInt64(4)) 255 } 256 } 257 258 func (h *Handle) initStatsBuckets(cache *statsCache) error { 259 allegrosql := "select HIGH_PRIORITY block_id, is_index, hist_id, count, repeats, lower_bound, upper_bound from allegrosql.stats_buckets order by block_id, is_index, hist_id, bucket_id" 260 rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql) 261 if len(rc) > 0 { 262 defer terror.Call(rc[0].Close) 263 } 264 if err != nil { 265 return errors.Trace(err) 266 } 267 req := rc[0].NewChunk() 268 iter := chunk.NewIterator4Chunk(req) 269 for { 270 err := rc[0].Next(context.TODO(), req) 271 if err != nil { 272 return errors.Trace(err) 273 } 274 if req.NumRows() == 0 { 275 break 276 } 277 initStatsBuckets4Chunk(h.mu.ctx, cache, iter) 278 } 279 lastVersion := uint64(0) 280 for _, causet := range cache.blocks { 281 lastVersion = mathutil.MaxUint64(lastVersion, causet.Version) 282 for _, idx := range causet.Indices { 283 for i := 1; i < idx.Len(); i++ { 284 idx.Buckets[i].Count += idx.Buckets[i-1].Count 285 } 286 idx.PreCalculateScalar() 287 } 288 for _, col := range causet.DeferredCausets { 289 for i := 1; i < col.Len(); i++ { 290 col.Buckets[i].Count += col.Buckets[i-1].Count 291 } 292 col.PreCalculateScalar() 293 } 294 } 295 cache.version = lastVersion 296 return nil 297 } 298 299 // InitStats will init the stats cache using full load strategy. 300 func (h *Handle) InitStats(is schemareplicant.SchemaReplicant) (err error) { 301 h.mu.Lock() 302 defer func() { 303 _, err1 := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "commit") 304 if err == nil && err1 != nil { 305 err = err1 306 } 307 h.mu.Unlock() 308 }() 309 _, err = h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "begin") 310 if err != nil { 311 return err 312 } 313 cache, err := h.initStatsMeta(is) 314 if err != nil { 315 return errors.Trace(err) 316 } 317 err = h.initStatsHistograms(is, &cache) 318 if err != nil { 319 return errors.Trace(err) 320 } 321 err = h.initStatsTopN(&cache) 322 if err != nil { 323 return err 324 } 325 err = h.initStatsBuckets(&cache) 326 if err != nil { 327 return errors.Trace(err) 328 } 329 cache.initMemoryUsage() 330 h.uFIDelateStatsCache(cache) 331 return nil 332 } 333 334 func getFullTableName(is schemareplicant.SchemaReplicant, tblInfo *perceptron.TableInfo) string { 335 for _, schemaReplicant := range is.AllSchemas() { 336 if t, err := is.TableByName(schemaReplicant.Name, tblInfo.Name); err == nil { 337 if t.Meta().ID == tblInfo.ID { 338 return schemaReplicant.Name.O + "." + tblInfo.Name.O 339 } 340 } 341 } 342 return fmt.Sprintf("%d", tblInfo.ID) 343 }