github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/auto_refresh.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statspro
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  	types2 "github.com/dolthub/go-mysql-server/sql/types"
    25  
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
    27  )
    28  
    29  const asyncAutoRefreshStats = "async_auto_refresh_stats"
    30  
    31  func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error {
    32  	_, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
    33  	_, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
    34  	interval64, _, _ := types2.Int64.Convert(interval)
    35  	intervalSec := time.Second * time.Duration(interval64.(int64))
    36  	thresholdf64 := threshold.(float64)
    37  
    38  	ctx, err := ctxFactory(context.Background())
    39  	if err != nil {
    40  		return err
    41  	}
    42  
    43  	branches := p.getStatsBranches(ctx)
    44  
    45  	return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches)
    46  }
    47  
    48  func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error {
    49  	// this is only called after initial statistics are finished loading
    50  	// launch a thread that periodically checks freshness
    51  
    52  	p.mu.Lock()
    53  	defer p.mu.Unlock()
    54  
    55  	dropDbCtx, dbStatsCancel := context.WithCancel(context.Background())
    56  	p.cancelers[dbName] = dbStatsCancel
    57  
    58  	return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) {
    59  		ticker := time.NewTicker(checkInterval + time.Nanosecond)
    60  		for {
    61  			select {
    62  			case <-ctx.Done():
    63  				ticker.Stop()
    64  				return
    65  			case <-ticker.C:
    66  				select {
    67  				case <-dropDbCtx.Done():
    68  					ticker.Stop()
    69  					return
    70  				default:
    71  				}
    72  
    73  				sqlCtx, err := ctxFactory(ctx)
    74  				if err != nil {
    75  					return
    76  				}
    77  
    78  				dSess := dsess.DSessFromSess(sqlCtx.Session)
    79  				ddb, ok := dSess.GetDoltDB(sqlCtx, dbName)
    80  				if !ok {
    81  					sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName)
    82  					return
    83  				}
    84  				for _, branch := range branches {
    85  					if br, ok, err := ddb.HasBranch(ctx, branch); ok {
    86  						sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String())
    87  						// update WORKING session references
    88  						sqlDb, err := dSess.Provider().Database(sqlCtx, p.branchQualifiedDatabase(dbName, branch))
    89  						if err != nil {
    90  							sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
    91  							return
    92  						}
    93  
    94  						if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil {
    95  							sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
    96  							return
    97  						}
    98  					} else if err != nil {
    99  						sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error())
   100  					} else {
   101  						sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br)
   102  					}
   103  				}
   104  			}
   105  		}
   106  	})
   107  }
   108  
   109  func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error {
   110  	p.mu.Lock()
   111  	defer p.mu.Unlock()
   112  
   113  	// Iterate all dbs, tables, indexes. Each db will collect
   114  	// []indexMeta above refresh threshold. We read and process those
   115  	// chunks' statistics. We merge updated chunks with precomputed
   116  	// chunks. The full set of statistics for each database lands
   117  	// 1) in the provider's most recent set of database statistics, and
   118  	// 2) on disk in the database's statistics ref'd prolly.Map.
   119  	statDb, ok := p.getStatDb(dbName)
   120  	if !ok {
   121  		return sql.ErrDatabaseNotFound.New(dbName)
   122  	}
   123  
   124  	var deletedStats []sql.StatQualifier
   125  	qualExists := make(map[sql.StatQualifier]bool)
   126  	tableExistsAndSkipped := make(map[string]bool)
   127  
   128  	tables, err := sqlDb.GetTableNames(ctx)
   129  	if err != nil {
   130  		return err
   131  	}
   132  
   133  	for _, table := range tables {
   134  		sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb)
   135  		if err != nil {
   136  			return err
   137  		}
   138  
   139  		tableHash, err := dTab.GetRowDataHash(ctx)
   140  		if err != nil {
   141  			return err
   142  		}
   143  
   144  		if statDb.GetLatestHash(branch, table) == tableHash {
   145  			// no data changes since last check
   146  			tableExistsAndSkipped[table] = true
   147  			ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash)
   148  			continue
   149  		} else {
   150  			ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash)
   151  		}
   152  
   153  		iat, ok := sqlTable.(sql.IndexAddressableTable)
   154  		if !ok {
   155  			return fmt.Errorf("table does not support indexes %s", table)
   156  		}
   157  
   158  		indexes, err := iat.GetIndexes(ctx)
   159  		if err != nil {
   160  			return err
   161  		}
   162  
   163  		// collect indexes and ranges to be updated
   164  		var idxMetas []indexMeta
   165  		for _, index := range indexes {
   166  			qual := sql.NewStatQualifier(dbName, table, strings.ToLower(index.ID()))
   167  			qualExists[qual] = true
   168  			curStat, ok := statDb.GetStat(branch, qual)
   169  			if !ok {
   170  				curStat = NewDoltStats()
   171  				curStat.Statistic.Qual = qual
   172  
   173  				cols := make([]string, len(index.Expressions()))
   174  				tablePrefix := fmt.Sprintf("%s.", table)
   175  				for i, c := range index.Expressions() {
   176  					cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
   177  				}
   178  				curStat.Statistic.Cols = cols
   179  			}
   180  			ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String())
   181  
   182  			updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
   183  			if err != nil {
   184  				ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
   185  				continue
   186  			}
   187  			curCnt := float64(len(curStat.Active))
   188  			updateCnt := float64(len(updateMeta.newNodes))
   189  			deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks))
   190  			ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt))
   191  
   192  			if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh {
   193  				if curCnt == 0 && updateCnt == 0 {
   194  					continue
   195  				}
   196  				ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual)
   197  				// mark index for updating
   198  				idxMetas = append(idxMetas, updateMeta)
   199  				// update lastest hash if we haven't already
   200  				statDb.SetLatestHash(branch, table, tableHash)
   201  			}
   202  		}
   203  
   204  		// get new buckets for index chunks to update
   205  		newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
   206  		if err != nil {
   207  			return err
   208  		}
   209  
   210  		// merge new chunks with preexisting chunks
   211  		for _, updateMeta := range idxMetas {
   212  			stat := newTableStats[updateMeta.qual]
   213  			if stat != nil {
   214  				var err error
   215  				if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok {
   216  					err = statDb.SetStat(ctx, branch, updateMeta.qual, stat)
   217  				} else {
   218  					err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist)
   219  				}
   220  				if err != nil {
   221  					return err
   222  				}
   223  				p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
   224  			}
   225  		}
   226  	}
   227  
   228  	for _, q := range statDb.ListStatQuals(branch) {
   229  		// table or index delete leaves hole in stats
   230  		// this is separate from threshold check
   231  		if !tableExistsAndSkipped[q.Table()] && !qualExists[q] {
   232  			// only delete stats we've verified are deleted
   233  			deletedStats = append(deletedStats, q)
   234  		}
   235  	}
   236  
   237  	statDb.DeleteStats(branch, deletedStats...)
   238  
   239  	if err := statDb.Flush(ctx, branch); err != nil {
   240  		return err
   241  	}
   242  
   243  	return nil
   244  }