github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/analyze.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statspro
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  
    21  	"github.com/dolthub/go-mysql-server/sql"
    22  
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
    28  	"github.com/dolthub/dolt/go/store/hash"
    29  	"github.com/dolthub/dolt/go/store/prolly/tree"
    30  )
    31  
    32  func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error {
    33  	dSess := dsess.DSessFromSess(ctx.Session)
    34  	branch, err := dSess.GetBranch()
    35  	if err != nil {
    36  		return err
    37  	}
    38  
    39  	sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch))
    40  	if err != nil {
    41  		return err
    42  	}
    43  
    44  	// lock only after accessing DatabaseProvider
    45  	p.mu.Lock()
    46  	defer p.mu.Unlock()
    47  
    48  	tableName := strings.ToLower(table.Name())
    49  	dbName := strings.ToLower(db)
    50  
    51  	iat, ok := table.(sql.IndexAddressableTable)
    52  	if !ok {
    53  		return nil
    54  	}
    55  	indexes, err := iat.GetIndexes(ctx)
    56  	if err != nil {
    57  		return err
    58  	}
    59  
    60  	// it's important to update WORKING session references every call
    61  	sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb)
    62  	if err != nil {
    63  		return err
    64  	}
    65  
    66  	statDb, ok := p.getStatDb(dbName)
    67  	if !ok {
    68  		// if the stats database does not exist, initialize one
    69  		fs, err := p.pro.FileSystemForDatabase(dbName)
    70  		if err != nil {
    71  			return err
    72  		}
    73  		sourceDb, ok := p.pro.BaseDatabase(ctx, dbName)
    74  		if !ok {
    75  			return sql.ErrDatabaseNotFound.New(dbName)
    76  		}
    77  		statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir)
    78  		if err != nil {
    79  			ctx.Warn(0, err.Error())
    80  			return nil
    81  		}
    82  		p.setStatDb(dbName, statDb)
    83  	}
    84  
    85  	tablePrefix := fmt.Sprintf("%s.", tableName)
    86  	var idxMetas []indexMeta
    87  	for _, idx := range indexes {
    88  		cols := make([]string, len(idx.Expressions()))
    89  		for i, c := range idx.Expressions() {
    90  			cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
    91  		}
    92  
    93  		qual := sql.NewStatQualifier(db, table.Name(), strings.ToLower(idx.ID()))
    94  		curStat, ok := statDb.GetStat(branch, qual)
    95  		if !ok {
    96  			curStat = NewDoltStats()
    97  			curStat.Statistic.Qual = qual
    98  		}
    99  		idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
   100  		if err != nil {
   101  			return err
   102  		}
   103  		idxMetas = append(idxMetas, idxMeta)
   104  	}
   105  
   106  	newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
   107  	if err != nil {
   108  		return err
   109  	}
   110  
   111  	// merge new chunks with preexisting chunks
   112  	for _, idxMeta := range idxMetas {
   113  		stat := newTableStats[idxMeta.qual]
   114  		targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist)
   115  		if err != nil {
   116  			return err
   117  		}
   118  		if targetChunks == nil {
   119  			// empty table
   120  			continue
   121  		}
   122  		stat.Chunks = idxMeta.allAddrs
   123  		stat.Hist = targetChunks
   124  		stat.UpdateActive()
   125  		if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil {
   126  			return err
   127  		}
   128  	}
   129  
   130  	p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
   131  	return statDb.Flush(ctx, branch)
   132  }
   133  
   134  // branchQualifiedDatabase returns a branch qualified database. If the database
   135  // is already branch suffixed no duplication is applied.
   136  func (p *Provider) branchQualifiedDatabase(db, branch string) string {
   137  	suffix := fmt.Sprintf("/%s", branch)
   138  	if !strings.HasSuffix(db, suffix) {
   139  		return fmt.Sprintf("%s%s", db, suffix)
   140  	}
   141  	return db
   142  }
   143  
   144  // GetLatestTable will get the WORKING root table for the current database/branch
   145  func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) {
   146  	sqlTable, ok, err := sqlDb.(sqle.Database).GetTableInsensitive(ctx, tableName)
   147  	if err != nil {
   148  		return nil, nil, err
   149  	}
   150  	if !ok {
   151  		return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
   152  	}
   153  
   154  	var dTab *doltdb.Table
   155  	switch t := sqlTable.(type) {
   156  	case *sqle.AlterableDoltTable:
   157  		dTab, err = t.DoltTable.DoltTable(ctx)
   158  	case *sqle.WritableDoltTable:
   159  		dTab, err = t.DoltTable.DoltTable(ctx)
   160  	case *sqle.DoltTable:
   161  		dTab, err = t.DoltTable(ctx)
   162  	default:
   163  		err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
   164  	}
   165  	if err != nil {
   166  		return nil, nil, err
   167  	}
   168  	return sqlTable, dTab, nil
   169  }
   170  
   171  func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, error) {
   172  	var idx durable.Index
   173  	var err error
   174  	if strings.EqualFold(sqlIndex.ID(), "PRIMARY") {
   175  		idx, err = doltTable.GetRowData(ctx)
   176  	} else {
   177  		idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID())
   178  	}
   179  	if err != nil {
   180  		return indexMeta{}, err
   181  	}
   182  
   183  	prollyMap := durable.ProllyMapFromIndex(idx)
   184  
   185  	if cnt, err := prollyMap.Count(); err != nil {
   186  		return indexMeta{}, err
   187  	} else if cnt == 0 {
   188  		return indexMeta{
   189  			qual: curStats.Statistic.Qual,
   190  			cols: cols,
   191  		}, nil
   192  	}
   193  
   194  	// get newest histogram target level hashes
   195  	levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
   196  	if err != nil {
   197  		return indexMeta{}, err
   198  	}
   199  
   200  	var addrs []hash.Hash
   201  	var keepChunks []sql.HistogramBucket
   202  	var missingAddrs float64
   203  	var missingChunks []tree.Node
   204  	var missingOffsets []updateOrdinal
   205  	var offset uint64
   206  
   207  	for _, n := range levelNodes {
   208  		// Compare the previous histogram chunks to the newest tree chunks.
   209  		// Partition the newest chunks into 1) preserved or 2) missing.
   210  		// Missing chunks will need to be scanned on a stats update, so
   211  		// track the (start, end) ordinal offsets to simplify the read iter.
   212  		treeCnt, err := n.TreeCount()
   213  		if err != nil {
   214  			return indexMeta{}, err
   215  		}
   216  
   217  		addrs = append(addrs, n.HashOf())
   218  		if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok {
   219  			missingChunks = append(missingChunks, n)
   220  			missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)})
   221  			missingAddrs++
   222  		} else {
   223  			keepChunks = append(keepChunks, curStats.Hist[bucketIdx])
   224  		}
   225  		offset += uint64(treeCnt)
   226  	}
   227  
   228  	var dropChunks []sql.HistogramBucket
   229  	for _, h := range curStats.Chunks {
   230  		var match bool
   231  		for _, b := range keepChunks {
   232  			if DoltBucketChunk(b) == h {
   233  				match = true
   234  				break
   235  			}
   236  		}
   237  		if !match {
   238  			dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]])
   239  		}
   240  	}
   241  
   242  	return indexMeta{
   243  		qual:           curStats.Statistic.Qual,
   244  		cols:           cols,
   245  		newNodes:       missingChunks,
   246  		updateOrdinals: missingOffsets,
   247  		keepChunks:     keepChunks,
   248  		dropChunks:     dropChunks,
   249  		allAddrs:       addrs,
   250  	}, nil
   251  }