github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/dbs/cmd/importer/stats.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package main 15 16 import ( 17 "encoding/json" 18 "io/ioutil" 19 "math/rand" 20 "time" 21 22 "github.com/whtcorpsinc/errors" 23 "github.com/whtcorpsinc/log" 24 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 25 stats "github.com/whtcorpsinc/milevadb/statistics" 26 "github.com/whtcorpsinc/milevadb/statistics/handle" 27 "github.com/whtcorpsinc/milevadb/types" 28 "go.uber.org/zap" 29 ) 30 31 func loadStats(tblInfo *perceptron.TableInfo, path string) (*stats.Block, error) { 32 data, err := ioutil.ReadFile(path) 33 if err != nil { 34 return nil, errors.Trace(err) 35 } 36 jsTable := &handle.JSONTable{} 37 err = json.Unmarshal(data, jsTable) 38 if err != nil { 39 return nil, errors.Trace(err) 40 } 41 return handle.TableStatsFromJSON(tblInfo, tblInfo.ID, jsTable) 42 } 43 44 type histogram struct { 45 stats.Histogram 46 47 index *perceptron.IndexInfo 48 avgLen int 49 } 50 51 // When the randCnt falls in the midbse of bucket, we return the idx of lower bound which is an even number. 52 // When the randCnt falls in the end of bucket, we return the upper bound which is odd. 53 func (h *histogram) getRandomBoundIdx() int { 54 cnt := h.Buckets[len(h.Buckets)-1].Count 55 randCnt := randInt64(0, cnt) 56 for i, bkt := range h.Buckets { 57 if bkt.Count >= randCnt { 58 if bkt.Count-bkt.Repeat > randCnt { 59 return 2 * i 60 } 61 return 2*i + 1 62 } 63 } 64 return 0 65 } 66 67 func (h *histogram) randInt() int64 { 68 idx := h.getRandomBoundIdx() 69 if idx%2 == 0 { 70 lower := h.Bounds.GetRow(idx).GetInt64(0) 71 upper := h.Bounds.GetRow(idx + 1).GetInt64(0) 72 return randInt64(lower, upper) 73 } 74 return h.Bounds.GetRow(idx).GetInt64(0) 75 } 76 77 func getValidPrefix(lower, upper string) string { 78 for i := range lower { 79 if i >= len(upper) { 80 log.Fatal("lower is larger than upper", zap.String("lower", lower), zap.String("upper", upper)) 81 } 82 if lower[i] != upper[i] { 83 randCh := uint8(rand.Intn(int(upper[i]-lower[i]))) + lower[i] 84 newBytes := make([]byte, i, i+1) 85 copy(newBytes, lower[:i]) 86 newBytes = append(newBytes, randCh) 87 return string(newBytes) 88 } 89 } 90 return lower 91 } 92 93 func (h *histogram) getAvgLen(maxLen int) int { 94 l := h.Bounds.NumRows() 95 totalLen := 0 96 for i := 0; i < l; i++ { 97 totalLen += len(h.Bounds.GetRow(i).GetString(0)) 98 } 99 avg := totalLen / l 100 if avg > maxLen { 101 avg = maxLen 102 } 103 if avg == 0 { 104 avg = 1 105 } 106 return avg 107 } 108 109 func (h *histogram) randString() string { 110 idx := h.getRandomBoundIdx() 111 if idx%2 == 0 { 112 lower := h.Bounds.GetRow(idx).GetString(0) 113 upper := h.Bounds.GetRow(idx + 1).GetString(0) 114 prefix := getValidPrefix(lower, upper) 115 restLen := h.avgLen - len(prefix) 116 if restLen > 0 { 117 prefix = prefix + randString(restLen) 118 } 119 return prefix 120 } 121 return h.Bounds.GetRow(idx).GetString(0) 122 } 123 124 // randDate randoms a bucket and random a date between upper and lower bound. 125 func (h *histogram) randDate(unit string, mysqlFmt string, dateFmt string) string { 126 idx := h.getRandomBoundIdx() 127 if idx%2 == 0 { 128 lower := h.Bounds.GetRow(idx).GetTime(0) 129 upper := h.Bounds.GetRow(idx + 1).GetTime(0) 130 diff := types.TimestamFIDeliff(unit, lower, upper) 131 if diff == 0 { 132 str, err := lower.DateFormat(mysqlFmt) 133 if err != nil { 134 log.Fatal(err.Error()) 135 } 136 return str 137 } 138 delta := randInt(0, int(diff)-1) 139 l, err := lower.GoTime(time.Local) 140 if err != nil { 141 log.Fatal(err.Error()) 142 } 143 l = l.AddDate(0, 0, delta) 144 return l.Format(dateFmt) 145 } 146 str, err := h.Bounds.GetRow(idx).GetTime(0).DateFormat(mysqlFmt) 147 if err != nil { 148 log.Fatal(err.Error()) 149 } 150 return str 151 }