github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/dolt_stats.go (about) 1 // Copyright 2024 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package statspro 16 17 import ( 18 "fmt" 19 "sync" 20 "time" 21 22 "github.com/dolthub/go-mysql-server/sql" 23 "github.com/dolthub/go-mysql-server/sql/stats" 24 25 "github.com/dolthub/dolt/go/store/hash" 26 ) 27 28 type DoltStats struct { 29 Statistic *stats.Statistic 30 mu *sync.Mutex 31 // Chunks is a list of addresses for the histogram fanout level 32 Chunks []hash.Hash 33 // Active maps a chunk/bucket address to its position in 34 // the histogram. 1-indexed to differentiate from an empty 35 // field on disk 36 Active map[hash.Hash]int 37 Hist sql.Histogram 38 } 39 40 var _ sql.Statistic = (*DoltStats)(nil) 41 42 func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic { 43 ret := *s 44 ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic) 45 return &ret 46 } 47 48 func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic { 49 ret := *s 50 ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic) 51 return &ret 52 } 53 54 func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic { 55 ret := *s 56 ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic) 57 return &ret 58 } 59 60 func (s *DoltStats) WithRowCount(u uint64) sql.Statistic { 61 ret := *s 62 ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic) 63 return &ret 64 } 65 66 func (s *DoltStats) WithNullCount(u uint64) sql.Statistic { 67 ret := *s 68 ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic) 69 return &ret 70 } 71 72 func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic { 73 ret := *s 74 ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic) 75 return &ret 76 } 77 78 func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic { 79 ret := *s 80 ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic) 81 return &ret 82 } 83 84 func (s *DoltStats) RowCount() uint64 { 85 return s.Statistic.RowCount() 86 } 87 88 func (s *DoltStats) DistinctCount() uint64 { 89 return s.Statistic.DistinctCount() 90 } 91 92 func (s *DoltStats) NullCount() uint64 { 93 return s.Statistic.NullCount() 94 95 } 96 97 func (s *DoltStats) AvgSize() uint64 { 98 return s.Statistic.AvgSize() 99 100 } 101 102 func (s *DoltStats) CreatedAt() time.Time { 103 return s.Statistic.CreatedAt() 104 105 } 106 107 func (s *DoltStats) Columns() []string { 108 return s.Statistic.Columns() 109 } 110 111 func (s *DoltStats) Types() []sql.Type { 112 return s.Statistic.Types() 113 } 114 115 func (s *DoltStats) Qualifier() sql.StatQualifier { 116 return s.Statistic.Qualifier() 117 } 118 119 func (s *DoltStats) IndexClass() sql.IndexClass { 120 return s.Statistic.IndexClass() 121 } 122 123 func (s *DoltStats) FuncDeps() *sql.FuncDepSet { 124 return s.Statistic.FuncDeps() 125 } 126 127 func (s *DoltStats) ColSet() sql.ColSet { 128 return s.Statistic.ColSet() 129 } 130 131 func (s *DoltStats) LowerBound() sql.Row { 132 return s.Statistic.LowerBound() 133 } 134 135 func NewDoltStats() *DoltStats { 136 return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}} 137 } 138 139 func (s *DoltStats) ToInterface() (interface{}, error) { 140 statVal, err := s.Statistic.ToInterface() 141 if err != nil { 142 return nil, err 143 } 144 ret := statVal.(map[string]interface{}) 145 146 var hist sql.Histogram 147 for _, b := range s.Hist { 148 hist = append(hist, b) 149 } 150 histVal, err := hist.ToInterface() 151 if err != nil { 152 return nil, err 153 } 154 ret["statistic"].(map[string]interface{})["buckets"] = histVal 155 return ret, nil 156 } 157 158 func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) { 159 ret := *s 160 ret.Hist = nil 161 for _, b := range h { 162 doltB, ok := b.(DoltBucket) 163 if !ok { 164 return nil, fmt.Errorf("invalid bucket type: %T", b) 165 } 166 ret.Hist = append(ret.Hist, doltB) 167 } 168 return &ret, nil 169 } 170 171 func (s *DoltStats) Histogram() sql.Histogram { 172 return s.Hist 173 } 174 175 func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) { 176 hist, err := DoltHistFromSql(stat.Histogram(), stat.Types()) 177 if err != nil { 178 return nil, err 179 } 180 ret := &DoltStats{ 181 mu: &sync.Mutex{}, 182 Hist: hist, 183 Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()), 184 Active: make(map[hash.Hash]int), 185 } 186 ret.Statistic.Fds = stat.FuncDeps() 187 ret.Statistic.Colset = stat.ColSet() 188 return ret, nil 189 } 190 191 func (s *DoltStats) UpdateActive() { 192 s.mu.Lock() 193 defer s.mu.Unlock() 194 newActive := make(map[hash.Hash]int) 195 for i, hash := range s.Chunks { 196 newActive[hash] = i 197 } 198 s.Active = newActive 199 } 200 201 type DoltHistogram []DoltBucket 202 203 type DoltBucket struct { 204 *stats.Bucket 205 Chunk hash.Hash 206 Created time.Time 207 } 208 209 func DoltBucketChunk(b sql.HistogramBucket) hash.Hash { 210 return b.(DoltBucket).Chunk 211 } 212 213 func DoltBucketCreated(b sql.HistogramBucket) time.Time { 214 return b.(DoltBucket).Created 215 } 216 217 var _ sql.HistogramBucket = (*DoltBucket)(nil) 218 219 func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) { 220 ret := make(sql.Histogram, len(hist)) 221 var err error 222 for i, b := range hist { 223 upperBound := make(sql.Row, len(b.UpperBound())) 224 for i, v := range b.UpperBound() { 225 upperBound[i], _, err = types[i].Convert(v) 226 if err != nil { 227 return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String()) 228 } 229 } 230 mcvs := make([]sql.Row, len(b.Mcvs())) 231 for i, mcv := range b.Mcvs() { 232 for _, v := range mcv { 233 conv, _, err := types[i].Convert(v) 234 if err != nil { 235 return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String()) 236 } 237 mcvs[i] = append(mcvs[i], conv) 238 } 239 } 240 ret[i] = DoltBucket{ 241 Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs), 242 } 243 } 244 return ret, nil 245 }