github.com/dolthub/go-mysql-server@v0.18.0/sql/stats/statistic.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stats 16 17 // This is a presentation layer package. Custom implementations converge here 18 // as a conversion between SQL inputs/outputs. These do not add anything to the 19 // interfaces defined in |sql|, but the separation is necessary for import conflicts. 20 21 import ( 22 "fmt" 23 "regexp" 24 "time" 25 26 "github.com/dolthub/vitess/go/vt/sqlparser" 27 28 "github.com/dolthub/go-mysql-server/sql" 29 "github.com/dolthub/go-mysql-server/sql/types" 30 ) 31 32 func NewStatistic(rowCount, distinctCount, nullCount, avgSize uint64, createdAt time.Time, qualifier sql.StatQualifier, columns []string, types []sql.Type, histogram []*Bucket, class sql.IndexClass, lowerBound sql.Row) *Statistic { 33 return &Statistic{ 34 RowCnt: rowCount, 35 DistinctCnt: distinctCount, 36 NullCnt: nullCount, 37 AvgRowSize: avgSize, 38 Created: createdAt, 39 Qual: qualifier, 40 Cols: columns, 41 Typs: types, 42 Hist: histogram, 43 IdxClass: uint8(class), 44 LowerBnd: lowerBound, 45 } 46 } 47 48 type Statistic struct { 49 RowCnt uint64 `json:"row_count"` 50 DistinctCnt uint64 `json:"distinct_count"` 51 NullCnt uint64 `json:"null_count"` 52 AvgRowSize uint64 `json:"avg_size"` 53 Created time.Time `json:"created_at"` 54 Qual sql.StatQualifier `json:"qualifier"` 55 Cols []string `json:"columns"` 56 Typs []sql.Type `json:"-"` 57 Hist []*Bucket `json:"buckets"` 58 IdxClass uint8 `json:"index_class"` 59 LowerBnd sql.Row `json:"lower_bound"` 60 fds *sql.FuncDepSet `json:"-"` 61 colSet sql.ColSet `json:"-"` 62 } 63 64 var _ sql.JSONWrapper = (*Statistic)(nil) 65 var _ sql.Statistic = (*Statistic)(nil) 66 67 func (s *Statistic) FuncDeps() *sql.FuncDepSet { 68 return s.fds 69 } 70 71 func (s *Statistic) WithFuncDeps(fds *sql.FuncDepSet) sql.Statistic { 72 ret := *s 73 ret.fds = fds 74 return &ret 75 } 76 77 func (s *Statistic) LowerBound() sql.Row { 78 return s.LowerBnd 79 } 80 81 func (s *Statistic) ColSet() sql.ColSet { 82 return s.colSet 83 } 84 85 func (s *Statistic) WithColSet(cols sql.ColSet) sql.Statistic { 86 ret := *s 87 ret.colSet = cols 88 return &ret 89 } 90 91 func (s *Statistic) SetTypes(t []sql.Type) { 92 s.Typs = t 93 } 94 95 func (s *Statistic) SetColumns(c []string) { 96 s.Cols = c 97 } 98 99 func (s *Statistic) SetQualifier(q sql.StatQualifier) { 100 s.Qual = q 101 } 102 103 func (s *Statistic) RowCount() uint64 { 104 return s.RowCnt 105 } 106 107 func (s *Statistic) DistinctCount() uint64 { 108 return s.DistinctCnt 109 } 110 111 func (s *Statistic) NullCount() uint64 { 112 return s.NullCnt 113 } 114 115 func (s *Statistic) AvgSize() uint64 { 116 return s.AvgRowSize 117 } 118 119 func (s *Statistic) CreatedAt() time.Time { 120 return s.Created 121 } 122 123 func (s *Statistic) Columns() []string { 124 return s.Cols 125 } 126 127 func (s *Statistic) Qualifier() sql.StatQualifier { 128 return s.Qual 129 } 130 131 func (s *Statistic) Types() []sql.Type { 132 return s.Typs 133 } 134 135 func (s *Statistic) Histogram() sql.Histogram { 136 buckets := make([]sql.HistogramBucket, len(s.Hist)) 137 for i, b := range s.Hist { 138 buckets[i] = b 139 } 140 return buckets 141 } 142 143 func (s *Statistic) WithDistinctCount(i uint64) sql.Statistic { 144 ret := *s 145 ret.DistinctCnt = i 146 return &ret 147 } 148 149 func (s *Statistic) WithRowCount(i uint64) sql.Statistic { 150 ret := *s 151 ret.RowCnt = i 152 return &ret 153 } 154 155 func (s *Statistic) WithNullCount(i uint64) sql.Statistic { 156 ret := *s 157 ret.NullCnt = i 158 return &ret 159 } 160 161 func (s *Statistic) WithAvgSize(i uint64) sql.Statistic { 162 ret := *s 163 ret.AvgRowSize = i 164 return &ret 165 } 166 167 func (s *Statistic) WithLowerBound(r sql.Row) sql.Statistic { 168 ret := *s 169 ret.LowerBnd = r 170 return &ret 171 } 172 173 func (s *Statistic) WithHistogram(h sql.Histogram) (sql.Statistic, error) { 174 ret := *s 175 ret.Hist = nil 176 for _, b := range h { 177 sqlB, ok := b.(*Bucket) 178 if !ok { 179 return nil, fmt.Errorf("invalid bucket type: %T", b) 180 } 181 ret.Hist = append(ret.Hist, sqlB) 182 } 183 return &ret, nil 184 } 185 186 func (s *Statistic) IndexClass() sql.IndexClass { 187 return sql.IndexClass(s.IdxClass) 188 } 189 190 func (s *Statistic) ToInterface() interface{} { 191 typs := make([]string, len(s.Typs)) 192 for i, t := range s.Typs { 193 typs[i] = t.String() 194 } 195 return map[string]interface{}{ 196 "statistic": map[string]interface{}{ 197 "row_count": s.RowCount(), 198 "null_count": s.RowCount(), 199 "distinct_count": s.DistinctCount(), 200 "avg_size": s.AvgSize(), 201 "created_at": s.CreatedAt(), 202 "qualifier": s.Qualifier().String(), 203 "columns": s.Columns(), 204 "types:": typs, 205 "buckets": s.Histogram().ToInterface(), 206 }, 207 } 208 } 209 210 func ParseTypeStrings(typs []string) ([]sql.Type, error) { 211 if len(typs) == 0 { 212 return nil, nil 213 } 214 ret := make([]sql.Type, len(typs)) 215 var err error 216 typRegex := regexp.MustCompile("([a-z]+)\\((\\d+)\\)") 217 for i, typ := range typs { 218 typMatch := typRegex.FindStringSubmatch(typ) 219 colType := &sqlparser.ColumnType{} 220 if typMatch == nil { 221 colType.Type = typ 222 } else { 223 colType.Type = typMatch[1] 224 if len(typMatch) > 2 { 225 colType.Length = &sqlparser.SQLVal{Val: []byte(typMatch[2]), Type: sqlparser.IntVal} 226 } 227 } 228 ret[i], err = types.ColumnTypeToType(colType) 229 if err != nil { 230 return nil, fmt.Errorf("failed to parse histogram type: %s", typMatch) 231 } 232 } 233 return ret, nil 234 } 235 236 func NewHistogramBucket(rowCount, distinctCount, nullCount, boundCount uint64, boundValue sql.Row, mcvCounts []uint64, mcvs []sql.Row) *Bucket { 237 return &Bucket{ 238 RowCnt: rowCount, 239 DistinctCnt: distinctCount, 240 NullCnt: nullCount, 241 McvsCnt: mcvCounts, 242 BoundCnt: boundCount, 243 BoundVal: boundValue, 244 McvVals: mcvs, 245 } 246 } 247 248 type Bucket struct { 249 RowCnt uint64 `json:"row_count"` 250 DistinctCnt uint64 `json:"distinct_count"` 251 NullCnt uint64 `json:"null_count"` 252 McvsCnt []uint64 `json:"mcv_counts"` 253 BoundCnt uint64 `json:"bound_count"` 254 BoundVal sql.Row `json:"upper_bound"` 255 McvVals []sql.Row `json:"mcvs"` 256 } 257 258 var _ sql.HistogramBucket = (*Bucket)(nil) 259 260 func (b Bucket) RowCount() uint64 { 261 return b.RowCnt 262 } 263 264 func (b Bucket) DistinctCount() uint64 { 265 return b.DistinctCnt 266 } 267 268 func (b Bucket) NullCount() uint64 { 269 return b.NullCnt 270 } 271 272 func (b Bucket) BoundCount() uint64 { 273 return b.BoundCnt 274 } 275 276 func (b Bucket) UpperBound() sql.Row { 277 return b.BoundVal 278 } 279 280 func (b Bucket) McvCounts() []uint64 { 281 return b.McvsCnt 282 } 283 284 func (b Bucket) Mcvs() []sql.Row { 285 return b.McvVals 286 }