github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statsnoms/write.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statsnoms
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"strings"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  	"github.com/dolthub/go-mysql-server/sql/stats"
    26  
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
    29  	"github.com/dolthub/dolt/go/store/prolly"
    30  	"github.com/dolthub/dolt/go/store/val"
    31  )
    32  
    33  // About ~200 20 byte address fit in a ~4k chunk. Chunk sizes
    34  // are approximate, but certainly shouldn't reach the square
    35  // of the expected size.
    36  const maxBucketFanout = 200 * 200
    37  
    38  func (n *NomsStatsDatabase) replaceStats(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
    39  	if err := deleteIndexRows(ctx, statsMap, dStats); err != nil {
    40  		return err
    41  	}
    42  	return putIndexRows(ctx, statsMap, dStats)
    43  }
    44  
    45  func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
    46  	sch := schema.StatsTableDoltSchema
    47  	kd, _ := sch.GetMapDescriptors()
    48  
    49  	keyBuilder := val.NewTupleBuilder(kd)
    50  
    51  	qual := dStats.Qualifier()
    52  	pool := statsMap.NodeStore().Pool()
    53  
    54  	// delete previous entries for this index -> (db, table, index, pos)
    55  	keyBuilder.PutString(0, qual.Database)
    56  	keyBuilder.PutString(1, qual.Table())
    57  	keyBuilder.PutString(2, qual.Index())
    58  	keyBuilder.PutInt64(3, 0)
    59  	firstKey := keyBuilder.Build(pool)
    60  	keyBuilder.PutString(0, qual.Database)
    61  	keyBuilder.PutString(1, qual.Table())
    62  	keyBuilder.PutString(2, qual.Index())
    63  	keyBuilder.PutInt64(3, maxBucketFanout+1)
    64  	maxKey := keyBuilder.Build(pool)
    65  
    66  	// there is a limit on the number of buckets for a given index, iter
    67  	// will terminate before maxBucketFanout
    68  	iter, err := statsMap.IterKeyRange(ctx, firstKey, maxKey)
    69  	if err != nil {
    70  		return err
    71  	}
    72  
    73  	for {
    74  		k, _, err := iter.Next(ctx)
    75  		if errors.Is(err, io.EOF) {
    76  			break
    77  		} else if err != nil {
    78  			return err
    79  		}
    80  		err = statsMap.Put(ctx, k, nil)
    81  		if err != nil {
    82  			return err
    83  		}
    84  	}
    85  	return nil
    86  }
    87  
    88  func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
    89  	sch := schema.StatsTableDoltSchema
    90  	kd, vd := sch.GetMapDescriptors()
    91  
    92  	keyBuilder := val.NewTupleBuilder(kd)
    93  	valueBuilder := val.NewTupleBuilder(vd)
    94  
    95  	qual := dStats.Qualifier()
    96  	pool := statsMap.NodeStore().Pool()
    97  
    98  	// now add new buckets
    99  	typesB := strings.Builder{}
   100  	sep := ""
   101  	for _, t := range dStats.Statistic.Typs {
   102  		typesB.WriteString(sep + t.String())
   103  		sep = ","
   104  	}
   105  	typesStr := typesB.String()
   106  
   107  	var pos int64
   108  	for _, h := range dStats.Hist {
   109  		var upperBoundElems []string
   110  		for _, v := range h.UpperBound() {
   111  			upperBoundElems = append(upperBoundElems, fmt.Sprintf("%v", v))
   112  		}
   113  
   114  		keyBuilder.PutString(0, qual.Database)
   115  		keyBuilder.PutString(1, qual.Tab)
   116  		keyBuilder.PutString(2, qual.Idx)
   117  		keyBuilder.PutInt64(3, pos)
   118  
   119  		valueBuilder.PutInt64(0, schema.StatsVersion)
   120  		valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String())
   121  		valueBuilder.PutInt64(2, int64(h.RowCount()))
   122  		valueBuilder.PutInt64(3, int64(h.DistinctCount()))
   123  		valueBuilder.PutInt64(4, int64(h.NullCount()))
   124  		valueBuilder.PutString(5, strings.Join(dStats.Columns(), ","))
   125  		valueBuilder.PutString(6, typesStr)
   126  		valueBuilder.PutString(7, stats.StringifyKey(h.UpperBound(), dStats.Statistic.Typs))
   127  		valueBuilder.PutInt64(8, int64(h.BoundCount()))
   128  		valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h))
   129  		for i, r := range h.Mcvs() {
   130  			valueBuilder.PutString(10+i, stats.StringifyKey(r, dStats.Statistic.Typs))
   131  		}
   132  		var mcvCntsRow sql.Row
   133  		for _, v := range h.McvCounts() {
   134  			mcvCntsRow = append(mcvCntsRow, int(v))
   135  		}
   136  		valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, dStats.Statistic.Typs))
   137  
   138  		key := keyBuilder.Build(pool)
   139  		value := valueBuilder.Build(pool)
   140  		statsMap.Put(ctx, key, value)
   141  		pos++
   142  	}
   143  	return nil
   144  }