github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/auto_refresh.go (about) 1 // Copyright 2024 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package statspro 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 "time" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 types2 "github.com/dolthub/go-mysql-server/sql/types" 25 26 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" 27 ) 28 29 const asyncAutoRefreshStats = "async_auto_refresh_stats" 30 31 func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error { 32 _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold) 33 _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval) 34 interval64, _, _ := types2.Int64.Convert(interval) 35 intervalSec := time.Second * time.Duration(interval64.(int64)) 36 thresholdf64 := threshold.(float64) 37 38 ctx, err := ctxFactory(context.Background()) 39 if err != nil { 40 return err 41 } 42 43 branches := p.getStatsBranches(ctx) 44 45 return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches) 46 } 47 48 func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error { 49 // this is only called after initial statistics are finished loading 50 // launch a thread that periodically checks freshness 51 52 p.mu.Lock() 53 defer p.mu.Unlock() 54 55 dropDbCtx, dbStatsCancel := context.WithCancel(context.Background()) 56 p.cancelers[dbName] = dbStatsCancel 57 58 return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) { 59 ticker := time.NewTicker(checkInterval + time.Nanosecond) 60 for { 61 select { 62 case <-ctx.Done(): 63 ticker.Stop() 64 return 65 case <-ticker.C: 66 select { 67 case <-dropDbCtx.Done(): 68 ticker.Stop() 69 return 70 default: 71 } 72 73 sqlCtx, err := ctxFactory(ctx) 74 if err != nil { 75 return 76 } 77 78 dSess := dsess.DSessFromSess(sqlCtx.Session) 79 ddb, ok := dSess.GetDoltDB(sqlCtx, dbName) 80 if !ok { 81 sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName) 82 return 83 } 84 for _, branch := range branches { 85 if br, ok, err := ddb.HasBranch(ctx, branch); ok { 86 sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String()) 87 // update WORKING session references 88 sqlDb, err := dSess.Provider().Database(sqlCtx, p.branchQualifiedDatabase(dbName, branch)) 89 if err != nil { 90 sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) 91 return 92 } 93 94 if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil { 95 sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) 96 return 97 } 98 } else if err != nil { 99 sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error()) 100 } else { 101 sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br) 102 } 103 } 104 } 105 } 106 }) 107 } 108 109 func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error { 110 p.mu.Lock() 111 defer p.mu.Unlock() 112 113 // Iterate all dbs, tables, indexes. Each db will collect 114 // []indexMeta above refresh threshold. We read and process those 115 // chunks' statistics. We merge updated chunks with precomputed 116 // chunks. The full set of statistics for each database lands 117 // 1) in the provider's most recent set of database statistics, and 118 // 2) on disk in the database's statistics ref'd prolly.Map. 119 statDb, ok := p.getStatDb(dbName) 120 if !ok { 121 return sql.ErrDatabaseNotFound.New(dbName) 122 } 123 124 var deletedStats []sql.StatQualifier 125 qualExists := make(map[sql.StatQualifier]bool) 126 tableExistsAndSkipped := make(map[string]bool) 127 128 tables, err := sqlDb.GetTableNames(ctx) 129 if err != nil { 130 return err 131 } 132 133 for _, table := range tables { 134 sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb) 135 if err != nil { 136 return err 137 } 138 139 tableHash, err := dTab.GetRowDataHash(ctx) 140 if err != nil { 141 return err 142 } 143 144 if statDb.GetLatestHash(branch, table) == tableHash { 145 // no data changes since last check 146 tableExistsAndSkipped[table] = true 147 ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash) 148 continue 149 } else { 150 ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash) 151 } 152 153 iat, ok := sqlTable.(sql.IndexAddressableTable) 154 if !ok { 155 return fmt.Errorf("table does not support indexes %s", table) 156 } 157 158 indexes, err := iat.GetIndexes(ctx) 159 if err != nil { 160 return err 161 } 162 163 // collect indexes and ranges to be updated 164 var idxMetas []indexMeta 165 for _, index := range indexes { 166 qual := sql.NewStatQualifier(dbName, table, strings.ToLower(index.ID())) 167 qualExists[qual] = true 168 curStat, ok := statDb.GetStat(branch, qual) 169 if !ok { 170 curStat = NewDoltStats() 171 curStat.Statistic.Qual = qual 172 173 cols := make([]string, len(index.Expressions())) 174 tablePrefix := fmt.Sprintf("%s.", table) 175 for i, c := range index.Expressions() { 176 cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) 177 } 178 curStat.Statistic.Cols = cols 179 } 180 ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String()) 181 182 updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns()) 183 if err != nil { 184 ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) 185 continue 186 } 187 curCnt := float64(len(curStat.Active)) 188 updateCnt := float64(len(updateMeta.newNodes)) 189 deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks)) 190 ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt)) 191 192 if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh { 193 if curCnt == 0 && updateCnt == 0 { 194 continue 195 } 196 ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual) 197 // mark index for updating 198 idxMetas = append(idxMetas, updateMeta) 199 // update lastest hash if we haven't already 200 statDb.SetLatestHash(branch, table, tableHash) 201 } 202 } 203 204 // get new buckets for index chunks to update 205 newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas) 206 if err != nil { 207 return err 208 } 209 210 // merge new chunks with preexisting chunks 211 for _, updateMeta := range idxMetas { 212 stat := newTableStats[updateMeta.qual] 213 if stat != nil { 214 var err error 215 if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok { 216 err = statDb.SetStat(ctx, branch, updateMeta.qual, stat) 217 } else { 218 err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist) 219 } 220 if err != nil { 221 return err 222 } 223 p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName)) 224 } 225 } 226 } 227 228 for _, q := range statDb.ListStatQuals(branch) { 229 // table or index delete leaves hole in stats 230 // this is separate from threshold check 231 if !tableExistsAndSkipped[q.Table()] && !qualExists[q] { 232 // only delete stats we've verified are deleted 233 deletedStats = append(deletedStats, q) 234 } 235 } 236 237 statDb.DeleteStats(branch, deletedStats...) 238 239 if err := statDb.Flush(ctx, branch); err != nil { 240 return err 241 } 242 243 return nil 244 }