github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/analyze.go (about) 1 // Copyright 2024 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package statspro 16 17 import ( 18 "fmt" 19 "strings" 20 21 "github.com/dolthub/go-mysql-server/sql" 22 23 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 24 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 25 "github.com/dolthub/dolt/go/libraries/doltcore/env" 26 "github.com/dolthub/dolt/go/libraries/doltcore/sqle" 27 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" 28 "github.com/dolthub/dolt/go/store/hash" 29 "github.com/dolthub/dolt/go/store/prolly/tree" 30 ) 31 32 func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error { 33 dSess := dsess.DSessFromSess(ctx.Session) 34 branch, err := dSess.GetBranch() 35 if err != nil { 36 return err 37 } 38 39 sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch)) 40 if err != nil { 41 return err 42 } 43 44 // lock only after accessing DatabaseProvider 45 p.mu.Lock() 46 defer p.mu.Unlock() 47 48 tableName := strings.ToLower(table.Name()) 49 dbName := strings.ToLower(db) 50 51 iat, ok := table.(sql.IndexAddressableTable) 52 if !ok { 53 return nil 54 } 55 indexes, err := iat.GetIndexes(ctx) 56 if err != nil { 57 return err 58 } 59 60 // it's important to update WORKING session references every call 61 sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb) 62 if err != nil { 63 return err 64 } 65 66 statDb, ok := p.getStatDb(dbName) 67 if !ok { 68 // if the stats database does not exist, initialize one 69 fs, err := p.pro.FileSystemForDatabase(dbName) 70 if err != nil { 71 return err 72 } 73 sourceDb, ok := p.pro.BaseDatabase(ctx, dbName) 74 if !ok { 75 return sql.ErrDatabaseNotFound.New(dbName) 76 } 77 statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir) 78 if err != nil { 79 ctx.Warn(0, err.Error()) 80 return nil 81 } 82 p.setStatDb(dbName, statDb) 83 } 84 85 tablePrefix := fmt.Sprintf("%s.", tableName) 86 var idxMetas []indexMeta 87 for _, idx := range indexes { 88 cols := make([]string, len(idx.Expressions())) 89 for i, c := range idx.Expressions() { 90 cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) 91 } 92 93 qual := sql.NewStatQualifier(db, table.Name(), strings.ToLower(idx.ID())) 94 curStat, ok := statDb.GetStat(branch, qual) 95 if !ok { 96 curStat = NewDoltStats() 97 curStat.Statistic.Qual = qual 98 } 99 idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols) 100 if err != nil { 101 return err 102 } 103 idxMetas = append(idxMetas, idxMeta) 104 } 105 106 newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas) 107 if err != nil { 108 return err 109 } 110 111 // merge new chunks with preexisting chunks 112 for _, idxMeta := range idxMetas { 113 stat := newTableStats[idxMeta.qual] 114 targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist) 115 if err != nil { 116 return err 117 } 118 if targetChunks == nil { 119 // empty table 120 continue 121 } 122 stat.Chunks = idxMeta.allAddrs 123 stat.Hist = targetChunks 124 stat.UpdateActive() 125 if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil { 126 return err 127 } 128 } 129 130 p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName)) 131 return statDb.Flush(ctx, branch) 132 } 133 134 // branchQualifiedDatabase returns a branch qualified database. If the database 135 // is already branch suffixed no duplication is applied. 136 func (p *Provider) branchQualifiedDatabase(db, branch string) string { 137 suffix := fmt.Sprintf("/%s", branch) 138 if !strings.HasSuffix(db, suffix) { 139 return fmt.Sprintf("%s%s", db, suffix) 140 } 141 return db 142 } 143 144 // GetLatestTable will get the WORKING root table for the current database/branch 145 func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) { 146 sqlTable, ok, err := sqlDb.(sqle.Database).GetTableInsensitive(ctx, tableName) 147 if err != nil { 148 return nil, nil, err 149 } 150 if !ok { 151 return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName) 152 } 153 154 var dTab *doltdb.Table 155 switch t := sqlTable.(type) { 156 case *sqle.AlterableDoltTable: 157 dTab, err = t.DoltTable.DoltTable(ctx) 158 case *sqle.WritableDoltTable: 159 dTab, err = t.DoltTable.DoltTable(ctx) 160 case *sqle.DoltTable: 161 dTab, err = t.DoltTable(ctx) 162 default: 163 err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable) 164 } 165 if err != nil { 166 return nil, nil, err 167 } 168 return sqlTable, dTab, nil 169 } 170 171 func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, error) { 172 var idx durable.Index 173 var err error 174 if strings.EqualFold(sqlIndex.ID(), "PRIMARY") { 175 idx, err = doltTable.GetRowData(ctx) 176 } else { 177 idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID()) 178 } 179 if err != nil { 180 return indexMeta{}, err 181 } 182 183 prollyMap := durable.ProllyMapFromIndex(idx) 184 185 if cnt, err := prollyMap.Count(); err != nil { 186 return indexMeta{}, err 187 } else if cnt == 0 { 188 return indexMeta{ 189 qual: curStats.Statistic.Qual, 190 cols: cols, 191 }, nil 192 } 193 194 // get newest histogram target level hashes 195 levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt) 196 if err != nil { 197 return indexMeta{}, err 198 } 199 200 var addrs []hash.Hash 201 var keepChunks []sql.HistogramBucket 202 var missingAddrs float64 203 var missingChunks []tree.Node 204 var missingOffsets []updateOrdinal 205 var offset uint64 206 207 for _, n := range levelNodes { 208 // Compare the previous histogram chunks to the newest tree chunks. 209 // Partition the newest chunks into 1) preserved or 2) missing. 210 // Missing chunks will need to be scanned on a stats update, so 211 // track the (start, end) ordinal offsets to simplify the read iter. 212 treeCnt, err := n.TreeCount() 213 if err != nil { 214 return indexMeta{}, err 215 } 216 217 addrs = append(addrs, n.HashOf()) 218 if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok { 219 missingChunks = append(missingChunks, n) 220 missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)}) 221 missingAddrs++ 222 } else { 223 keepChunks = append(keepChunks, curStats.Hist[bucketIdx]) 224 } 225 offset += uint64(treeCnt) 226 } 227 228 var dropChunks []sql.HistogramBucket 229 for _, h := range curStats.Chunks { 230 var match bool 231 for _, b := range keepChunks { 232 if DoltBucketChunk(b) == h { 233 match = true 234 break 235 } 236 } 237 if !match { 238 dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]]) 239 } 240 } 241 242 return indexMeta{ 243 qual: curStats.Statistic.Qual, 244 cols: cols, 245 newNodes: missingChunks, 246 updateOrdinals: missingOffsets, 247 keepChunks: keepChunks, 248 dropChunks: dropChunks, 249 allAddrs: addrs, 250 }, nil 251 }