github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statsnoms/database.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statsnoms
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"path"
    22  	"strings"
    23  	"sync"
    24  
    25  	"github.com/dolthub/go-mysql-server/sql"
    26  
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
    32  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
    33  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
    34  	"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
    35  	"github.com/dolthub/dolt/go/libraries/utils/earl"
    36  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    37  	"github.com/dolthub/dolt/go/store/hash"
    38  	"github.com/dolthub/dolt/go/store/prolly"
    39  	"github.com/dolthub/dolt/go/store/types"
    40  )
    41  
    42  func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory {
    43  	return &NomsStatsFactory{dialPro: dialPro}
    44  }
    45  
    46  type NomsStatsFactory struct {
    47  	dialPro dbfactory.GRPCDialProvider
    48  }
    49  
    50  var _ statspro.StatsFactory = NomsStatsFactory{}
    51  
    52  func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) {
    53  	params := make(map[string]interface{})
    54  	params[dbfactory.GRPCDialProviderParam] = sf.dialPro
    55  
    56  	var urlPath string
    57  	u, err := earl.Parse(prov.DbFactoryUrl())
    58  	if u.Scheme == dbfactory.MemScheme {
    59  		urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir)
    60  	} else if u.Scheme == dbfactory.FileScheme {
    61  		urlPath = doltdb.LocalDirDoltDB
    62  	}
    63  
    64  	statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  
    69  	var dEnv *env.DoltEnv
    70  	exists, isDir := statsFs.Exists("")
    71  	if !exists {
    72  		err := statsFs.MkDirs("")
    73  		if err != nil {
    74  			return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
    75  		}
    76  
    77  		dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test")
    78  		sess := dsess.DSessFromSess(ctx.Session)
    79  		err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch())
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  	} else if !isDir {
    84  		return nil, fmt.Errorf("file exists where the dolt stats directory should be")
    85  	} else {
    86  		dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "")
    87  	}
    88  
    89  	ddb, err := doltdb.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  
    94  	dEnv.DoltDB = ddb
    95  
    96  	deaf := dEnv.DbEaFactory()
    97  
    98  	tmpDir, err := dEnv.TempTableFilesDir()
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  	opts := editor.Options{
   103  		Deaf:    deaf,
   104  		Tempdir: tmpDir,
   105  	}
   106  	statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(), opts)
   107  	if err != nil {
   108  		return nil, err
   109  	}
   110  	return NewNomsStats(sourceDb, statsDb), nil
   111  }
   112  
   113  func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase {
   114  	return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb}
   115  }
   116  
   117  type dbStats map[sql.StatQualifier]*statspro.DoltStats
   118  
   119  type NomsStatsDatabase struct {
   120  	mu               *sync.Mutex
   121  	destDb           dsess.SqlDatabase
   122  	sourceDb         dsess.SqlDatabase
   123  	stats            []dbStats
   124  	branches         []string
   125  	latestTableRoots []map[string]hash.Hash
   126  	dirty            []*prolly.MutableMap
   127  }
   128  
   129  var _ statspro.Database = (*NomsStatsDatabase)(nil)
   130  
   131  func (n *NomsStatsDatabase) Close() error {
   132  	return n.destDb.DbData().Ddb.Close()
   133  }
   134  
   135  func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
   136  	statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch)
   137  	if errors.Is(err, doltdb.ErrNoStatistics) {
   138  		return nil
   139  	} else if err != nil {
   140  		return err
   141  	}
   142  	doltStats, err := loadStats(ctx, n.sourceDb, statsMap)
   143  	if err != nil {
   144  		return err
   145  	}
   146  	n.branches = append(n.branches, branch)
   147  	n.stats = append(n.stats, doltStats)
   148  	n.dirty = append(n.dirty, nil)
   149  	n.latestTableRoots = append(n.latestTableRoots, make(map[string]hash.Hash))
   150  	return nil
   151  }
   152  
   153  func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats {
   154  	for i, b := range n.branches {
   155  		if strings.EqualFold(b, branch) {
   156  			return n.stats[i]
   157  		}
   158  	}
   159  	return nil
   160  }
   161  
   162  func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) {
   163  	stats := n.getBranchStats(branch)
   164  	ret, ok := stats[qual]
   165  	return ret, ok
   166  }
   167  
   168  func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier {
   169  	stats := n.getBranchStats(branch)
   170  	var ret []sql.StatQualifier
   171  	for qual, _ := range stats {
   172  		ret = append(ret, qual)
   173  	}
   174  	return ret
   175  }
   176  
   177  func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
   178  	var statsMap *prolly.MutableMap
   179  	for i, b := range n.branches {
   180  		if strings.EqualFold(branch, b) {
   181  			n.stats[i][qual] = stats
   182  			if n.dirty[i] == nil {
   183  				n.initMutable(ctx, i)
   184  			}
   185  			statsMap = n.dirty[i]
   186  		}
   187  	}
   188  	if statsMap == nil {
   189  		if err := n.trackBranch(ctx, branch); err != nil {
   190  			return err
   191  		}
   192  		statsMap = n.dirty[len(n.branches)-1]
   193  		n.stats[len(n.branches)-1][qual] = stats
   194  	}
   195  
   196  	return n.replaceStats(ctx, statsMap, stats)
   197  }
   198  
   199  func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error {
   200  	n.branches = append(n.branches, branch)
   201  	n.stats = append(n.stats, make(dbStats))
   202  	n.latestTableRoots = append(n.latestTableRoots, make(map[string]hash.Hash))
   203  
   204  	kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
   205  	newMap, err := prolly.NewMapFromTuples(ctx, n.destDb.DbData().Ddb.NodeStore(), kd, vd)
   206  	if err != nil {
   207  		return err
   208  	}
   209  	n.dirty = append(n.dirty, newMap.Mutate())
   210  	return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf())
   211  }
   212  
   213  func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error {
   214  	statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i])
   215  	if err != nil {
   216  		return err
   217  	}
   218  	n.dirty[i] = statsMap.Mutate()
   219  	return nil
   220  }
   221  
   222  func (n *NomsStatsDatabase) DeleteStats(branch string, quals ...sql.StatQualifier) {
   223  	for i, b := range n.branches {
   224  		if strings.EqualFold(b, branch) {
   225  			for _, qual := range quals {
   226  				delete(n.stats[i], qual)
   227  			}
   228  		}
   229  	}
   230  }
   231  
   232  func (n *NomsStatsDatabase) DeleteBranchStats(ctx context.Context, branch string, flush bool) error {
   233  	for i, b := range n.branches {
   234  		if strings.EqualFold(b, branch) {
   235  			n.branches = append(n.branches[:i], n.branches[i+1:]...)
   236  			n.dirty = append(n.dirty[:i], n.dirty[i+1:]...)
   237  			n.stats = append(n.stats[:i], n.stats[i+1:]...)
   238  			n.latestTableRoots = append(n.latestTableRoots[:i], n.latestTableRoots[i+1:]...)
   239  		}
   240  	}
   241  	if flush {
   242  		return n.destDb.DbData().Ddb.DropStatisics(ctx, branch)
   243  	}
   244  	return nil
   245  }
   246  
   247  func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error {
   248  	var dbStat dbStats
   249  	for i, b := range n.branches {
   250  		if strings.EqualFold(b, branch) {
   251  			// naive merge the new with old
   252  			dbStat = n.stats[i]
   253  		}
   254  	}
   255  
   256  	if dbStat == nil {
   257  		if err := n.trackBranch(ctx, branch); err != nil {
   258  			return err
   259  		}
   260  		dbStat = n.stats[len(n.branches)-1]
   261  	}
   262  
   263  	if _, ok := dbStat[qual]; ok {
   264  		oldChunks := dbStat[qual].Hist
   265  		targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks)
   266  		if err != nil {
   267  			return err
   268  		}
   269  		dbStat[qual].Hist = targetBuckets
   270  	} else {
   271  		dbStat[qual] = statspro.NewDoltStats()
   272  	}
   273  	dbStat[qual].Chunks = targetHashes
   274  	dbStat[qual].UpdateActive()
   275  
   276  	// let |n.SetStats| update memory and disk
   277  	return n.SetStat(ctx, branch, qual, dbStat[qual])
   278  }
   279  
   280  func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error {
   281  	for i, b := range n.branches {
   282  		if strings.EqualFold(b, branch) {
   283  			if n.dirty[i] != nil {
   284  				flushedMap, err := n.dirty[i].Map(ctx)
   285  				if err != nil {
   286  					return err
   287  				}
   288  				n.dirty[i] = nil
   289  				n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf())
   290  				return nil
   291  			}
   292  		}
   293  	}
   294  	return nil
   295  }
   296  
   297  func (n *NomsStatsDatabase) GetLatestHash(branch, tableName string) hash.Hash {
   298  	n.mu.Lock()
   299  	defer n.mu.Unlock()
   300  	for i, b := range n.branches {
   301  		if strings.EqualFold(branch, b) {
   302  			return n.latestTableRoots[i][tableName]
   303  		}
   304  	}
   305  	return hash.Hash{}
   306  }
   307  
   308  func (n *NomsStatsDatabase) SetLatestHash(branch, tableName string, h hash.Hash) {
   309  	n.mu.Lock()
   310  	defer n.mu.Unlock()
   311  	for i, b := range n.branches {
   312  		if strings.EqualFold(branch, b) {
   313  			n.latestTableRoots[i][tableName] = h
   314  			break
   315  		}
   316  	}
   317  }