github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statsnoms/load.go (about) 1 // Copyright 2024 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package statsnoms 16 17 import ( 18 "errors" 19 "fmt" 20 "io" 21 "strconv" 22 "strings" 23 "time" 24 25 "github.com/dolthub/go-mysql-server/sql" 26 "github.com/dolthub/go-mysql-server/sql/stats" 27 28 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 29 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 30 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 31 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" 32 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" 33 "github.com/dolthub/dolt/go/store/hash" 34 "github.com/dolthub/dolt/go/store/prolly" 35 "github.com/dolthub/dolt/go/store/prolly/tree" 36 "github.com/dolthub/dolt/go/store/val" 37 ) 38 39 func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) { 40 qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats) 41 42 iter, err := NewStatsIter(ctx, m) 43 if err != nil { 44 return nil, err 45 } 46 currentStat := statspro.NewDoltStats() 47 var lowerBound sql.Row 48 for { 49 row, err := iter.Next(ctx) 50 if errors.Is(err, io.EOF) { 51 break 52 } else if err != nil { 53 return nil, err 54 } 55 56 // deserialize K, V 57 dbName := row[schema.StatsDbTag].(string) 58 tableName := row[schema.StatsTableTag].(string) 59 indexName := row[schema.StatsIndexTag].(string) 60 _ = row[schema.StatsVersionTag] 61 commit := hash.Parse(row[schema.StatsCommitHashTag].(string)) 62 rowCount := row[schema.StatsRowCountTag].(uint64) 63 distinctCount := row[schema.StatsDistinctCountTag].(uint64) 64 nullCount := row[schema.StatsNullCountTag].(uint64) 65 columns := strings.Split(row[schema.StatsColumnsTag].(string), ",") 66 typesStr := row[schema.StatsTypesTag].(string) 67 boundRowStr := row[schema.StatsUpperBoundTag].(string) 68 upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64) 69 createdAt := row[schema.StatsCreatedAtTag].(time.Time) 70 71 typs := strings.Split(typesStr, ",") 72 for i, t := range typs { 73 typs[i] = strings.TrimSpace(t) 74 } 75 76 numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag 77 78 mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",") 79 mcvCnts := make([]uint64, numMcvs) 80 for i, v := range mcvCountsStr { 81 val, err := strconv.Atoi(v) 82 if err != nil { 83 return nil, err 84 } 85 mcvCnts[i] = uint64(val) 86 } 87 88 mcvs := make([]sql.Row, numMcvs) 89 for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] { 90 if v != nil { 91 row, err := iter.ParseRow(v.(string)) 92 if err != nil { 93 return nil, err 94 } 95 mcvs[i] = row 96 } 97 } 98 99 for i, v := range mcvCnts { 100 if v == 0 { 101 mcvs = mcvs[:i] 102 mcvCnts = mcvCnts[:i] 103 break 104 } 105 } 106 107 boundRow, err := iter.ParseRow(boundRowStr) 108 if err != nil { 109 return nil, err 110 } 111 112 qual := sql.NewStatQualifier(dbName, tableName, indexName) 113 if currentStat.Statistic.Qual.String() != qual.String() { 114 if !currentStat.Statistic.Qual.Empty() { 115 currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual) 116 if err != nil { 117 return nil, err 118 } 119 fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual) 120 if err != nil { 121 return nil, err 122 } 123 currentStat.Statistic.Fds = fds 124 currentStat.Statistic.Colset = colSet 125 currentStat.UpdateActive() 126 qualToStats[currentStat.Statistic.Qual] = currentStat 127 } 128 129 currentStat = statspro.NewDoltStats() 130 currentStat.Statistic.Qual = qual 131 currentStat.Statistic.Cols = columns 132 currentStat.Statistic.LowerBnd = lowerBound 133 } 134 135 if currentStat.Statistic.Hist == nil { 136 currentStat.Statistic.Typs, err = stats.ParseTypeStrings(typs) 137 if err != nil { 138 return nil, err 139 } 140 currentStat.Statistic.Qual = qual 141 } 142 143 bucket := statspro.DoltBucket{ 144 Chunk: commit, 145 Created: createdAt, 146 Bucket: &stats.Bucket{ 147 RowCnt: uint64(rowCount), 148 DistinctCnt: uint64(distinctCount), 149 NullCnt: uint64(nullCount), 150 McvVals: mcvs, 151 McvsCnt: mcvCnts, 152 BoundCnt: upperBoundCnt, 153 BoundVal: boundRow, 154 }, 155 } 156 157 currentStat.Hist = append(currentStat.Hist, bucket) 158 currentStat.Statistic.RowCnt += uint64(rowCount) 159 currentStat.Statistic.DistinctCnt += uint64(distinctCount) 160 currentStat.Statistic.NullCnt += uint64(rowCount) 161 if currentStat.Statistic.Created.Before(createdAt) { 162 currentStat.Statistic.Created = createdAt 163 } 164 } 165 currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual) 166 if err != nil { 167 return nil, err 168 } 169 fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual) 170 if err != nil { 171 return nil, err 172 } 173 currentStat.Statistic.Fds = fds 174 currentStat.Statistic.Colset = colSet 175 currentStat.UpdateActive() 176 qualToStats[currentStat.Statistic.Qual] = currentStat 177 return qualToStats, nil 178 } 179 180 func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) { 181 dSess := dsess.DSessFromSess(ctx.Session) 182 roots, ok := dSess.GetRoots(ctx, qual.Db()) 183 if !ok { 184 return nil, nil 185 } 186 187 table, ok, err := roots.Head.GetTable(ctx, doltdb.TableName{Name: qual.Table()}) 188 if !ok { 189 return nil, nil 190 } 191 if err != nil { 192 return nil, err 193 } 194 idx, err := table.GetIndexRowData(ctx, qual.Index()) 195 if err != nil { 196 return nil, err 197 } 198 199 prollyMap := durable.ProllyMapFromIndex(idx) 200 keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc()) 201 buffPool := prollyMap.NodeStore().Pool() 202 203 firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1) 204 if err != nil { 205 return nil, err 206 } 207 keyBytes, _, err := firstIter.Next(ctx) 208 if err != nil { 209 return nil, err 210 } 211 for i := range keyBuilder.Desc.Types { 212 keyBuilder.PutRaw(i, keyBytes.GetField(i)) 213 } 214 215 firstKey := keyBuilder.Build(buffPool) 216 var firstRow sql.Row 217 for i := 0; i < keyBuilder.Desc.Count(); i++ { 218 firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore()) 219 if err != nil { 220 return nil, err 221 } 222 } 223 return firstRow, nil 224 } 225 226 func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) { 227 tab, ok, err := db.GetTableInsensitive(ctx, qual.Table()) 228 if err != nil { 229 return nil, sql.ColSet{}, err 230 } else if !ok { 231 return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table()) 232 } 233 234 iat, ok := tab.(sql.IndexAddressable) 235 if !ok { 236 return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table()) 237 } 238 239 indexes, err := iat.GetIndexes(ctx) 240 if err != nil { 241 return nil, sql.ColSet{}, err 242 } 243 244 var idx sql.Index 245 for _, i := range indexes { 246 if strings.EqualFold(i.ID(), qual.Index()) { 247 idx = i 248 break 249 } 250 } 251 252 if idx == nil { 253 return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index()) 254 } 255 256 return stats.IndexFds(qual.Table(), tab.Schema(), idx) 257 }