github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/stats_provider.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statspro
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"strings"
    22  	"sync"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
    29  	"github.com/dolthub/dolt/go/store/hash"
    30  	"github.com/dolthub/dolt/go/store/prolly/tree"
    31  )
    32  
    33  var ErrFailedToLoad = errors.New("failed to load statistics")
    34  
    35  type indexMeta struct {
    36  	qual     sql.StatQualifier
    37  	cols     []string
    38  	newNodes []tree.Node
    39  	// updateOrdinals are [start, stop] tuples for each update chunk
    40  	updateOrdinals []updateOrdinal
    41  	keepChunks     []sql.HistogramBucket
    42  	dropChunks     []sql.HistogramBucket
    43  	allAddrs       []hash.Hash
    44  }
    45  
    46  type updateOrdinal struct {
    47  	start, stop uint64
    48  }
    49  
    50  func NewProvider(pro *sqle.DoltDatabaseProvider, sf StatsFactory) *Provider {
    51  	return &Provider{
    52  		pro:       pro,
    53  		sf:        sf,
    54  		mu:        &sync.Mutex{},
    55  		statDbs:   make(map[string]Database),
    56  		cancelers: make(map[string]context.CancelFunc),
    57  		status:    make(map[string]string),
    58  	}
    59  }
    60  
    61  // Provider is the engine interface for reading and writing index statistics.
    62  // Each database has its own statistics table that all tables/indexes in a db
    63  // share.
    64  type Provider struct {
    65  	mu        *sync.Mutex
    66  	pro       *sqle.DoltDatabaseProvider
    67  	sf        StatsFactory
    68  	statDbs   map[string]Database
    69  	cancelers map[string]context.CancelFunc
    70  	starter   sqle.InitDatabaseHook
    71  	status    map[string]string
    72  }
    73  
    74  // each database has one statistics table that is a collection of the
    75  // table stats in the database
    76  type dbToStats struct {
    77  	mu                *sync.Mutex
    78  	dbName            string
    79  	stats             map[sql.StatQualifier]*DoltStats
    80  	statsDatabase     Database
    81  	latestTableHashes map[string]hash.Hash
    82  }
    83  
    84  func newDbStats(dbName string) *dbToStats {
    85  	return &dbToStats{
    86  		mu:                &sync.Mutex{},
    87  		dbName:            dbName,
    88  		stats:             make(map[sql.StatQualifier]*DoltStats),
    89  		latestTableHashes: make(map[string]hash.Hash),
    90  	}
    91  }
    92  
    93  var _ sql.StatsProvider = (*Provider)(nil)
    94  
    95  func (p *Provider) StartRefreshThread(ctx *sql.Context, pro dsess.DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error {
    96  	err := p.starter(ctx, pro.(*sqle.DoltDatabaseProvider), name, env, db)
    97  	p.mu.Lock()
    98  	defer p.mu.Unlock()
    99  
   100  	if err != nil {
   101  		p.UpdateStatus(name, fmt.Sprintf("error restarting thread %s: %s", name, err.Error()))
   102  		return err
   103  	}
   104  	p.UpdateStatus(name, fmt.Sprintf("restarted thread: %s", name))
   105  	return nil
   106  }
   107  
   108  func (p *Provider) SetStarter(hook sqle.InitDatabaseHook) {
   109  	p.starter = hook
   110  }
   111  
   112  func (p *Provider) CancelRefreshThread(dbName string) {
   113  	p.mu.Lock()
   114  	defer p.mu.Unlock()
   115  	if cancel, ok := p.cancelers[dbName]; ok {
   116  		cancel()
   117  		p.UpdateStatus(dbName, fmt.Sprintf("cancelled thread: %s", dbName))
   118  	}
   119  }
   120  
   121  func (p *Provider) ThreadStatus(dbName string) string {
   122  	p.mu.Lock()
   123  	defer p.mu.Unlock()
   124  
   125  	if msg, ok := p.status[dbName]; ok {
   126  		return msg
   127  	}
   128  	return "no active stats thread"
   129  }
   130  
   131  func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
   132  	dSess := dsess.DSessFromSess(ctx.Session)
   133  	branch, err := dSess.GetBranch()
   134  	if err != nil {
   135  		return nil, nil
   136  	}
   137  
   138  	// TODO: schema name
   139  	return p.GetTableDoltStats(ctx, branch, db, table.Name())
   140  }
   141  
   142  func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, table string) ([]sql.Statistic, error) {
   143  	p.mu.Lock()
   144  	defer p.mu.Unlock()
   145  
   146  	statDb, ok := p.getStatDb(db)
   147  	if !ok || statDb == nil {
   148  		return nil, nil
   149  	}
   150  
   151  	if branch == "" {
   152  		dSess := dsess.DSessFromSess(ctx.Session)
   153  		var err error
   154  		branch, err = dSess.GetBranch()
   155  		if err != nil {
   156  			return nil, nil
   157  		}
   158  	}
   159  
   160  	var ret []sql.Statistic
   161  	for _, qual := range statDb.ListStatQuals(branch) {
   162  		if strings.EqualFold(db, qual.Database) && strings.EqualFold(table, qual.Tab) {
   163  			stat, _ := statDb.GetStat(branch, qual)
   164  			ret = append(ret, stat)
   165  		}
   166  	}
   167  
   168  	return ret, nil
   169  }
   170  
   171  func (p *Provider) setStatDb(name string, db Database) {
   172  	p.statDbs[name] = db
   173  }
   174  
   175  func (p *Provider) getStatDb(name string) (Database, bool) {
   176  	statDb, ok := p.statDbs[strings.ToLower(name)]
   177  	return statDb, ok
   178  }
   179  
   180  func (p *Provider) SetStats(ctx *sql.Context, s sql.Statistic) error {
   181  	p.mu.Lock()
   182  	defer p.mu.Unlock()
   183  
   184  	statDb, ok := p.getStatDb(s.Qualifier().Db())
   185  	if !ok {
   186  		return nil
   187  	}
   188  
   189  	dSess := dsess.DSessFromSess(ctx.Session)
   190  	branch, err := dSess.GetBranch()
   191  	if err != nil {
   192  		return nil
   193  	}
   194  
   195  	doltStat, err := DoltStatsFromSql(s)
   196  	if err != nil {
   197  		return err
   198  	}
   199  
   200  	p.UpdateStatus(s.Qualifier().Db(), fmt.Sprintf("refreshed %s", s.Qualifier().Db()))
   201  
   202  	return statDb.SetStat(ctx, branch, s.Qualifier(), doltStat)
   203  }
   204  
   205  func (p *Provider) getQualStats(ctx *sql.Context, qual sql.StatQualifier) (*DoltStats, bool) {
   206  	statDb, ok := p.getStatDb(qual.Db())
   207  	if !ok {
   208  		return nil, false
   209  	}
   210  
   211  	dSess := dsess.DSessFromSess(ctx.Session)
   212  	branch, err := dSess.GetBranch()
   213  	if err != nil {
   214  		return nil, false
   215  	}
   216  
   217  	return statDb.GetStat(branch, qual)
   218  }
   219  
   220  func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) (sql.Statistic, bool) {
   221  	p.mu.Lock()
   222  	defer p.mu.Unlock()
   223  
   224  	stat, ok := p.getQualStats(ctx, qual)
   225  	if !ok {
   226  		return nil, false
   227  	}
   228  	return stat, true
   229  }
   230  
   231  func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error {
   232  	p.mu.Lock()
   233  	defer p.mu.Unlock()
   234  
   235  	statDb, ok := p.getStatDb(db)
   236  	if !ok {
   237  		return nil
   238  	}
   239  
   240  	dSess := dsess.DSessFromSess(ctx.Session)
   241  	branch, err := dSess.GetBranch()
   242  	if err != nil {
   243  		return err
   244  	}
   245  
   246  	// remove provider access
   247  	if err := statDb.DeleteBranchStats(ctx, branch, flush); err != nil {
   248  		return nil
   249  	}
   250  
   251  	p.status[db] = "dropped"
   252  
   253  	return nil
   254  }
   255  
   256  func (p *Provider) DropStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) error {
   257  	p.mu.Lock()
   258  	defer p.mu.Unlock()
   259  
   260  	statDb, ok := p.getStatDb(qual.Db())
   261  	if !ok {
   262  		return nil
   263  	}
   264  
   265  	dSess := dsess.DSessFromSess(ctx.Session)
   266  	branch, err := dSess.GetBranch()
   267  	if err != nil {
   268  		return nil
   269  	}
   270  
   271  	if _, ok := statDb.GetStat(branch, qual); ok {
   272  		statDb.DeleteStats(branch, qual)
   273  		p.UpdateStatus(qual.Db(), fmt.Sprintf("dropped statisic: %s", qual.String()))
   274  	}
   275  
   276  	return nil
   277  }
   278  
   279  func (p *Provider) UpdateStatus(db string, msg string) {
   280  	p.status[db] = msg
   281  }
   282  
   283  func (p *Provider) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
   284  	p.mu.Lock()
   285  	defer p.mu.Unlock()
   286  
   287  	statDb, ok := p.getStatDb(db)
   288  	if !ok {
   289  		return 0, sql.ErrDatabaseNotFound.New(db)
   290  	}
   291  
   292  	dSess := dsess.DSessFromSess(ctx.Session)
   293  	branch, err := dSess.GetBranch()
   294  	if err != nil {
   295  		return 0, err
   296  	}
   297  
   298  	// TODO: schema name
   299  	priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, table.Name(), "primary"))
   300  	if !ok {
   301  		return 0, nil
   302  	}
   303  
   304  	return priStats.RowCount(), nil
   305  }
   306  
   307  func (p *Provider) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
   308  	p.mu.Lock()
   309  	defer p.mu.Unlock()
   310  
   311  	statDb, ok := p.getStatDb(db)
   312  	if !ok {
   313  		return 0, sql.ErrDatabaseNotFound.New(db)
   314  	}
   315  
   316  	dSess := dsess.DSessFromSess(ctx.Session)
   317  	branch, err := dSess.GetBranch()
   318  	if err != nil {
   319  		return 0, err
   320  	}
   321  
   322  	// TODO: schema name
   323  	priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, table.Name(), "primary"))
   324  	if !ok {
   325  		return 0, nil
   326  	}
   327  
   328  	return priStats.AvgSize(), nil
   329  }