github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statsnoms/load.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statsnoms
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"io"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/dolthub/go-mysql-server/sql"
    26  	"github.com/dolthub/go-mysql-server/sql/stats"
    27  
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
    32  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
    33  	"github.com/dolthub/dolt/go/store/hash"
    34  	"github.com/dolthub/dolt/go/store/prolly"
    35  	"github.com/dolthub/dolt/go/store/prolly/tree"
    36  	"github.com/dolthub/dolt/go/store/val"
    37  )
    38  
    39  func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) {
    40  	qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats)
    41  
    42  	iter, err := NewStatsIter(ctx, m)
    43  	if err != nil {
    44  		return nil, err
    45  	}
    46  	currentStat := statspro.NewDoltStats()
    47  	var lowerBound sql.Row
    48  	for {
    49  		row, err := iter.Next(ctx)
    50  		if errors.Is(err, io.EOF) {
    51  			break
    52  		} else if err != nil {
    53  			return nil, err
    54  		}
    55  
    56  		// deserialize K, V
    57  		dbName := row[schema.StatsDbTag].(string)
    58  		tableName := row[schema.StatsTableTag].(string)
    59  		indexName := row[schema.StatsIndexTag].(string)
    60  		_ = row[schema.StatsVersionTag]
    61  		commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
    62  		rowCount := row[schema.StatsRowCountTag].(uint64)
    63  		distinctCount := row[schema.StatsDistinctCountTag].(uint64)
    64  		nullCount := row[schema.StatsNullCountTag].(uint64)
    65  		columns := strings.Split(row[schema.StatsColumnsTag].(string), ",")
    66  		typesStr := row[schema.StatsTypesTag].(string)
    67  		boundRowStr := row[schema.StatsUpperBoundTag].(string)
    68  		upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64)
    69  		createdAt := row[schema.StatsCreatedAtTag].(time.Time)
    70  
    71  		typs := strings.Split(typesStr, ",")
    72  		for i, t := range typs {
    73  			typs[i] = strings.TrimSpace(t)
    74  		}
    75  
    76  		numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
    77  
    78  		mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",")
    79  		mcvCnts := make([]uint64, numMcvs)
    80  		for i, v := range mcvCountsStr {
    81  			val, err := strconv.Atoi(v)
    82  			if err != nil {
    83  				return nil, err
    84  			}
    85  			mcvCnts[i] = uint64(val)
    86  		}
    87  
    88  		mcvs := make([]sql.Row, numMcvs)
    89  		for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
    90  			if v != nil {
    91  				row, err := iter.ParseRow(v.(string))
    92  				if err != nil {
    93  					return nil, err
    94  				}
    95  				mcvs[i] = row
    96  			}
    97  		}
    98  
    99  		for i, v := range mcvCnts {
   100  			if v == 0 {
   101  				mcvs = mcvs[:i]
   102  				mcvCnts = mcvCnts[:i]
   103  				break
   104  			}
   105  		}
   106  
   107  		boundRow, err := iter.ParseRow(boundRowStr)
   108  		if err != nil {
   109  			return nil, err
   110  		}
   111  
   112  		qual := sql.NewStatQualifier(dbName, tableName, indexName)
   113  		if currentStat.Statistic.Qual.String() != qual.String() {
   114  			if !currentStat.Statistic.Qual.Empty() {
   115  				currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual)
   116  				if err != nil {
   117  					return nil, err
   118  				}
   119  				fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
   120  				if err != nil {
   121  					return nil, err
   122  				}
   123  				currentStat.Statistic.Fds = fds
   124  				currentStat.Statistic.Colset = colSet
   125  				currentStat.UpdateActive()
   126  				qualToStats[currentStat.Statistic.Qual] = currentStat
   127  			}
   128  
   129  			currentStat = statspro.NewDoltStats()
   130  			currentStat.Statistic.Qual = qual
   131  			currentStat.Statistic.Cols = columns
   132  			currentStat.Statistic.LowerBnd = lowerBound
   133  		}
   134  
   135  		if currentStat.Statistic.Hist == nil {
   136  			currentStat.Statistic.Typs, err = stats.ParseTypeStrings(typs)
   137  			if err != nil {
   138  				return nil, err
   139  			}
   140  			currentStat.Statistic.Qual = qual
   141  		}
   142  
   143  		bucket := statspro.DoltBucket{
   144  			Chunk:   commit,
   145  			Created: createdAt,
   146  			Bucket: &stats.Bucket{
   147  				RowCnt:      uint64(rowCount),
   148  				DistinctCnt: uint64(distinctCount),
   149  				NullCnt:     uint64(nullCount),
   150  				McvVals:     mcvs,
   151  				McvsCnt:     mcvCnts,
   152  				BoundCnt:    upperBoundCnt,
   153  				BoundVal:    boundRow,
   154  			},
   155  		}
   156  
   157  		currentStat.Hist = append(currentStat.Hist, bucket)
   158  		currentStat.Statistic.RowCnt += uint64(rowCount)
   159  		currentStat.Statistic.DistinctCnt += uint64(distinctCount)
   160  		currentStat.Statistic.NullCnt += uint64(rowCount)
   161  		if currentStat.Statistic.Created.Before(createdAt) {
   162  			currentStat.Statistic.Created = createdAt
   163  		}
   164  	}
   165  	currentStat.Statistic.LowerBnd, err = loadLowerBound(ctx, currentStat.Statistic.Qual)
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  	fds, colSet, err := loadFuncDeps(ctx, db, currentStat.Statistic.Qual)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  	currentStat.Statistic.Fds = fds
   174  	currentStat.Statistic.Colset = colSet
   175  	currentStat.UpdateActive()
   176  	qualToStats[currentStat.Statistic.Qual] = currentStat
   177  	return qualToStats, nil
   178  }
   179  
   180  func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) {
   181  	dSess := dsess.DSessFromSess(ctx.Session)
   182  	roots, ok := dSess.GetRoots(ctx, qual.Db())
   183  	if !ok {
   184  		return nil, nil
   185  	}
   186  
   187  	table, ok, err := roots.Head.GetTable(ctx, doltdb.TableName{Name: qual.Table()})
   188  	if !ok {
   189  		return nil, nil
   190  	}
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  	idx, err := table.GetIndexRowData(ctx, qual.Index())
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  
   199  	prollyMap := durable.ProllyMapFromIndex(idx)
   200  	keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc())
   201  	buffPool := prollyMap.NodeStore().Pool()
   202  
   203  	firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
   204  	if err != nil {
   205  		return nil, err
   206  	}
   207  	keyBytes, _, err := firstIter.Next(ctx)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	for i := range keyBuilder.Desc.Types {
   212  		keyBuilder.PutRaw(i, keyBytes.GetField(i))
   213  	}
   214  
   215  	firstKey := keyBuilder.Build(buffPool)
   216  	var firstRow sql.Row
   217  	for i := 0; i < keyBuilder.Desc.Count(); i++ {
   218  		firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
   219  		if err != nil {
   220  			return nil, err
   221  		}
   222  	}
   223  	return firstRow, nil
   224  }
   225  
   226  func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
   227  	tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
   228  	if err != nil {
   229  		return nil, sql.ColSet{}, err
   230  	} else if !ok {
   231  		return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table())
   232  	}
   233  
   234  	iat, ok := tab.(sql.IndexAddressable)
   235  	if !ok {
   236  		return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table())
   237  	}
   238  
   239  	indexes, err := iat.GetIndexes(ctx)
   240  	if err != nil {
   241  		return nil, sql.ColSet{}, err
   242  	}
   243  
   244  	var idx sql.Index
   245  	for _, i := range indexes {
   246  		if strings.EqualFold(i.ID(), qual.Index()) {
   247  			idx = i
   248  			break
   249  		}
   250  	}
   251  
   252  	if idx == nil {
   253  		return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
   254  	}
   255  
   256  	return stats.IndexFds(qual.Table(), tab.Schema(), idx)
   257  }