github.com/dolthub/go-mysql-server@v0.18.0/sql/statistics.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sql 16 17 import ( 18 "fmt" 19 "strings" 20 "time" 21 ) 22 23 // StatisticsTable is a table that can provide information about its number of rows and other facts to improve query 24 // planning performance. 25 type StatisticsTable interface { 26 Table 27 // DataLength returns the length of the data file (varies by engine). 28 DataLength(ctx *Context) (uint64, error) 29 // RowCount returns the row count for this table and whether the count is exact 30 RowCount(ctx *Context) (uint64, bool, error) 31 } 32 33 // StatsProvider is a catalog extension for databases that can 34 // build and provide index statistics. 35 type StatsProvider interface { 36 // GetTableStats returns all statistics for the table 37 GetTableStats(ctx *Context, db, table string) ([]Statistic, error) 38 // RefreshTableStats updates all statistics associated with a given table 39 RefreshTableStats(ctx *Context, table Table, db string) error 40 // SetStats updates or overwrites a set of table statistics 41 SetStats(ctx *Context, stats Statistic) error 42 // GetStats fetches a set of statistics for a set of table columns 43 GetStats(ctx *Context, qual StatQualifier, cols []string) (Statistic, bool) 44 // DropStats deletes a set of column statistics 45 DropStats(ctx *Context, qual StatQualifier, cols []string) error 46 // DropAllStats deletes all database statistics 47 DropDbStats(ctx *Context, db string, flush bool) error 48 // RowCount returns the number of rows in a table 49 RowCount(ctx *Context, db, table string) (uint64, error) 50 // DataLength returns the estimated size of each row in the table 51 DataLength(ctx *Context, db, table string) (uint64, error) 52 } 53 54 type IndexClass uint8 55 56 const ( 57 IndexClassDefault = iota 58 IndexClassSpatial 59 IndexClassFulltext 60 ) 61 62 // Statistic is the top-level interface for accessing cardinality and 63 // costing estimates for an index prefix. 64 type Statistic interface { 65 JSONWrapper 66 MutableStatistic 67 RowCount() uint64 68 DistinctCount() uint64 69 NullCount() uint64 70 AvgSize() uint64 71 CreatedAt() time.Time 72 Columns() []string 73 Types() []Type 74 Qualifier() StatQualifier 75 Histogram() Histogram 76 IndexClass() IndexClass 77 FuncDeps() *FuncDepSet 78 ColSet() ColSet 79 LowerBound() Row 80 } 81 82 type MutableStatistic interface { 83 WithColSet(ColSet) Statistic 84 WithFuncDeps(*FuncDepSet) Statistic 85 WithHistogram(Histogram) (Statistic, error) 86 WithDistinctCount(uint64) Statistic 87 WithRowCount(uint64) Statistic 88 WithNullCount(uint64) Statistic 89 WithAvgSize(uint64) Statistic 90 WithLowerBound(Row) Statistic 91 } 92 93 func NewQualifierFromString(q string) (StatQualifier, error) { 94 parts := strings.Split(q, ".") 95 if len(parts) < 3 { 96 return StatQualifier{}, fmt.Errorf("invalid qualifier string: '%s', expected '<database>.<table>.<index>'", q) 97 } 98 return StatQualifier{Database: parts[0], Tab: parts[1], Idx: parts[2]}, nil 99 } 100 101 func NewStatQualifier(db, table, index string) StatQualifier { 102 return StatQualifier{Database: db, Tab: table, Idx: index} 103 } 104 105 // StatQualifier is the namespace hierarchy for a given statistic. 106 // The qualifier and set of columns completely describes a unique stat. 107 type StatQualifier struct { 108 Database string `json:"database"` 109 Tab string `json:"table"` 110 Idx string `json:"index"` 111 } 112 113 func (q StatQualifier) String() string { 114 if q.Idx != "" { 115 return fmt.Sprintf("%s.%s.%s", q.Database, q.Tab, q.Idx) 116 } 117 return fmt.Sprintf("%s.%s", q.Database, q.Tab) 118 } 119 120 func (q StatQualifier) Empty() bool { 121 return q.Idx == "" || q.Tab == "" || q.Database == "" 122 } 123 124 func (q StatQualifier) Db() string { 125 return q.Database 126 } 127 128 func (q StatQualifier) Table() string { 129 return q.Tab 130 } 131 132 func (q StatQualifier) Index() string { 133 return q.Idx 134 } 135 136 // Histogram is a collection of non-overlapping buckets that 137 // estimate the costing statistics for an index prefix. 138 // Note that a non-unique key can cross bucket boundaries. 139 type Histogram []HistogramBucket 140 141 func (h Histogram) IsEmpty() bool { 142 return len(h) == 0 143 } 144 145 func (h Histogram) ToInterface() interface{} { 146 ret := make([]interface{}, len(h)) 147 for i, b := range h { 148 var upperBound Row 149 for _, v := range b.UpperBound() { 150 upperBound = append(upperBound, v) 151 } 152 mcvs := make([]Row, len(b.Mcvs())) 153 for i, mcv := range b.Mcvs() { 154 var row Row 155 for _, v := range mcv { 156 row = append(row, v) 157 } 158 mcvs[i] = row 159 } 160 ret[i] = map[string]interface{}{ 161 "row_count": b.RowCount(), 162 "null_count": b.NullCount(), 163 "distinct_count": b.DistinctCount(), 164 "bound_count": b.BoundCount(), 165 "mcv_counts": b.McvCounts(), 166 "mcvs": mcvs, 167 "upper_bound": upperBound, 168 } 169 } 170 return ret 171 } 172 173 func (h Histogram) DebugString() string { 174 var bounds []string 175 var cnts []int 176 var allCnt int 177 for _, bucket := range h { 178 cnt := int(bucket.RowCount()) 179 var key []string 180 for _, v := range bucket.UpperBound() { 181 key = append(key, fmt.Sprintf("%v", v)) 182 } 183 bounds = append(bounds, strings.Join(key, ",")) 184 allCnt += cnt 185 cnts = append(cnts, cnt) 186 } 187 188 flatten := 50 / float64(allCnt) 189 b := strings.Builder{} 190 b.WriteString("histogram:\n") 191 for j, bound := range bounds { 192 b.WriteString(bound + ": ") 193 for i := 0; i < int(float64(cnts[j])*flatten); i++ { 194 b.WriteString("*") 195 } 196 fmt.Fprintf(&b, "(%d)\n", cnts[j]) 197 } 198 return b.String() 199 } 200 201 // HistogramBucket contains statistics for a fragment of an 202 // index's keyspace. 203 type HistogramBucket interface { 204 RowCount() uint64 205 DistinctCount() uint64 206 NullCount() uint64 207 BoundCount() uint64 208 UpperBound() Row 209 McvCounts() []uint64 210 Mcvs() []Row 211 } 212 213 // JSONWrapper is an integrator specific implementation of a JSON field value. 214 // The query engine can utilize these optimized access methods improve performance 215 // by minimizing the need to unmarshall a JSONWrapper into a JSONDocument. 216 type JSONWrapper interface { 217 // ToInterface converts a JSONWrapper to an interface{} of simple types 218 ToInterface() interface{} 219 }