github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statsnoms/database.go (about) 1 // Copyright 2024 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package statsnoms 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "path" 22 "strings" 23 "sync" 24 25 "github.com/dolthub/go-mysql-server/sql" 26 27 "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" 28 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 29 "github.com/dolthub/dolt/go/libraries/doltcore/env" 30 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 31 "github.com/dolthub/dolt/go/libraries/doltcore/sqle" 32 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" 33 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" 34 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 35 "github.com/dolthub/dolt/go/libraries/utils/earl" 36 "github.com/dolthub/dolt/go/libraries/utils/filesys" 37 "github.com/dolthub/dolt/go/store/hash" 38 "github.com/dolthub/dolt/go/store/prolly" 39 "github.com/dolthub/dolt/go/store/types" 40 ) 41 42 func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory { 43 return &NomsStatsFactory{dialPro: dialPro} 44 } 45 46 type NomsStatsFactory struct { 47 dialPro dbfactory.GRPCDialProvider 48 } 49 50 var _ statspro.StatsFactory = NomsStatsFactory{} 51 52 func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) { 53 params := make(map[string]interface{}) 54 params[dbfactory.GRPCDialProviderParam] = sf.dialPro 55 56 var urlPath string 57 u, err := earl.Parse(prov.DbFactoryUrl()) 58 if u.Scheme == dbfactory.MemScheme { 59 urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir) 60 } else if u.Scheme == dbfactory.FileScheme { 61 urlPath = doltdb.LocalDirDoltDB 62 } 63 64 statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) 65 if err != nil { 66 return nil, err 67 } 68 69 var dEnv *env.DoltEnv 70 exists, isDir := statsFs.Exists("") 71 if !exists { 72 err := statsFs.MkDirs("") 73 if err != nil { 74 return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error()) 75 } 76 77 dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test") 78 sess := dsess.DSessFromSess(ctx.Session) 79 err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch()) 80 if err != nil { 81 return nil, err 82 } 83 } else if !isDir { 84 return nil, fmt.Errorf("file exists where the dolt stats directory should be") 85 } else { 86 dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "") 87 } 88 89 ddb, err := doltdb.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params) 90 if err != nil { 91 return nil, err 92 } 93 94 dEnv.DoltDB = ddb 95 96 deaf := dEnv.DbEaFactory() 97 98 tmpDir, err := dEnv.TempTableFilesDir() 99 if err != nil { 100 return nil, err 101 } 102 opts := editor.Options{ 103 Deaf: deaf, 104 Tempdir: tmpDir, 105 } 106 statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(), opts) 107 if err != nil { 108 return nil, err 109 } 110 return NewNomsStats(sourceDb, statsDb), nil 111 } 112 113 func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase { 114 return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb} 115 } 116 117 type dbStats map[sql.StatQualifier]*statspro.DoltStats 118 119 type NomsStatsDatabase struct { 120 mu *sync.Mutex 121 destDb dsess.SqlDatabase 122 sourceDb dsess.SqlDatabase 123 stats []dbStats 124 branches []string 125 latestTableRoots []map[string]hash.Hash 126 dirty []*prolly.MutableMap 127 } 128 129 var _ statspro.Database = (*NomsStatsDatabase)(nil) 130 131 func (n *NomsStatsDatabase) Close() error { 132 return n.destDb.DbData().Ddb.Close() 133 } 134 135 func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error { 136 statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch) 137 if errors.Is(err, doltdb.ErrNoStatistics) { 138 return nil 139 } else if err != nil { 140 return err 141 } 142 doltStats, err := loadStats(ctx, n.sourceDb, statsMap) 143 if err != nil { 144 return err 145 } 146 n.branches = append(n.branches, branch) 147 n.stats = append(n.stats, doltStats) 148 n.dirty = append(n.dirty, nil) 149 n.latestTableRoots = append(n.latestTableRoots, make(map[string]hash.Hash)) 150 return nil 151 } 152 153 func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats { 154 for i, b := range n.branches { 155 if strings.EqualFold(b, branch) { 156 return n.stats[i] 157 } 158 } 159 return nil 160 } 161 162 func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) { 163 stats := n.getBranchStats(branch) 164 ret, ok := stats[qual] 165 return ret, ok 166 } 167 168 func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier { 169 stats := n.getBranchStats(branch) 170 var ret []sql.StatQualifier 171 for qual, _ := range stats { 172 ret = append(ret, qual) 173 } 174 return ret 175 } 176 177 func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error { 178 var statsMap *prolly.MutableMap 179 for i, b := range n.branches { 180 if strings.EqualFold(branch, b) { 181 n.stats[i][qual] = stats 182 if n.dirty[i] == nil { 183 n.initMutable(ctx, i) 184 } 185 statsMap = n.dirty[i] 186 } 187 } 188 if statsMap == nil { 189 if err := n.trackBranch(ctx, branch); err != nil { 190 return err 191 } 192 statsMap = n.dirty[len(n.branches)-1] 193 n.stats[len(n.branches)-1][qual] = stats 194 } 195 196 return n.replaceStats(ctx, statsMap, stats) 197 } 198 199 func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error { 200 n.branches = append(n.branches, branch) 201 n.stats = append(n.stats, make(dbStats)) 202 n.latestTableRoots = append(n.latestTableRoots, make(map[string]hash.Hash)) 203 204 kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors() 205 newMap, err := prolly.NewMapFromTuples(ctx, n.destDb.DbData().Ddb.NodeStore(), kd, vd) 206 if err != nil { 207 return err 208 } 209 n.dirty = append(n.dirty, newMap.Mutate()) 210 return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf()) 211 } 212 213 func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error { 214 statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i]) 215 if err != nil { 216 return err 217 } 218 n.dirty[i] = statsMap.Mutate() 219 return nil 220 } 221 222 func (n *NomsStatsDatabase) DeleteStats(branch string, quals ...sql.StatQualifier) { 223 for i, b := range n.branches { 224 if strings.EqualFold(b, branch) { 225 for _, qual := range quals { 226 delete(n.stats[i], qual) 227 } 228 } 229 } 230 } 231 232 func (n *NomsStatsDatabase) DeleteBranchStats(ctx context.Context, branch string, flush bool) error { 233 for i, b := range n.branches { 234 if strings.EqualFold(b, branch) { 235 n.branches = append(n.branches[:i], n.branches[i+1:]...) 236 n.dirty = append(n.dirty[:i], n.dirty[i+1:]...) 237 n.stats = append(n.stats[:i], n.stats[i+1:]...) 238 n.latestTableRoots = append(n.latestTableRoots[:i], n.latestTableRoots[i+1:]...) 239 } 240 } 241 if flush { 242 return n.destDb.DbData().Ddb.DropStatisics(ctx, branch) 243 } 244 return nil 245 } 246 247 func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error { 248 var dbStat dbStats 249 for i, b := range n.branches { 250 if strings.EqualFold(b, branch) { 251 // naive merge the new with old 252 dbStat = n.stats[i] 253 } 254 } 255 256 if dbStat == nil { 257 if err := n.trackBranch(ctx, branch); err != nil { 258 return err 259 } 260 dbStat = n.stats[len(n.branches)-1] 261 } 262 263 if _, ok := dbStat[qual]; ok { 264 oldChunks := dbStat[qual].Hist 265 targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks) 266 if err != nil { 267 return err 268 } 269 dbStat[qual].Hist = targetBuckets 270 } else { 271 dbStat[qual] = statspro.NewDoltStats() 272 } 273 dbStat[qual].Chunks = targetHashes 274 dbStat[qual].UpdateActive() 275 276 // let |n.SetStats| update memory and disk 277 return n.SetStat(ctx, branch, qual, dbStat[qual]) 278 } 279 280 func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error { 281 for i, b := range n.branches { 282 if strings.EqualFold(b, branch) { 283 if n.dirty[i] != nil { 284 flushedMap, err := n.dirty[i].Map(ctx) 285 if err != nil { 286 return err 287 } 288 n.dirty[i] = nil 289 n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf()) 290 return nil 291 } 292 } 293 } 294 return nil 295 } 296 297 func (n *NomsStatsDatabase) GetLatestHash(branch, tableName string) hash.Hash { 298 n.mu.Lock() 299 defer n.mu.Unlock() 300 for i, b := range n.branches { 301 if strings.EqualFold(branch, b) { 302 return n.latestTableRoots[i][tableName] 303 } 304 } 305 return hash.Hash{} 306 } 307 308 func (n *NomsStatsDatabase) SetLatestHash(branch, tableName string, h hash.Hash) { 309 n.mu.Lock() 310 defer n.mu.Unlock() 311 for i, b := range n.branches { 312 if strings.EqualFold(branch, b) { 313 n.latestTableRoots[i][tableName] = h 314 break 315 } 316 } 317 }