github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/dolt_stats.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statspro
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/dolthub/go-mysql-server/sql"
    23  	"github.com/dolthub/go-mysql-server/sql/stats"
    24  
    25  	"github.com/dolthub/dolt/go/store/hash"
    26  )
    27  
    28  type DoltStats struct {
    29  	Statistic *stats.Statistic
    30  	mu        *sync.Mutex
    31  	// Chunks is a list of addresses for the histogram fanout level
    32  	Chunks []hash.Hash
    33  	// Active maps a chunk/bucket address to its position in
    34  	// the histogram. 1-indexed to differentiate from an empty
    35  	// field on disk
    36  	Active map[hash.Hash]int
    37  	Hist   sql.Histogram
    38  }
    39  
    40  var _ sql.Statistic = (*DoltStats)(nil)
    41  
    42  func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic {
    43  	ret := *s
    44  	ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic)
    45  	return &ret
    46  }
    47  
    48  func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic {
    49  	ret := *s
    50  	ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic)
    51  	return &ret
    52  }
    53  
    54  func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic {
    55  	ret := *s
    56  	ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic)
    57  	return &ret
    58  }
    59  
    60  func (s *DoltStats) WithRowCount(u uint64) sql.Statistic {
    61  	ret := *s
    62  	ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic)
    63  	return &ret
    64  }
    65  
    66  func (s *DoltStats) WithNullCount(u uint64) sql.Statistic {
    67  	ret := *s
    68  	ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic)
    69  	return &ret
    70  }
    71  
    72  func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic {
    73  	ret := *s
    74  	ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic)
    75  	return &ret
    76  }
    77  
    78  func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic {
    79  	ret := *s
    80  	ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic)
    81  	return &ret
    82  }
    83  
    84  func (s *DoltStats) RowCount() uint64 {
    85  	return s.Statistic.RowCount()
    86  }
    87  
    88  func (s *DoltStats) DistinctCount() uint64 {
    89  	return s.Statistic.DistinctCount()
    90  }
    91  
    92  func (s *DoltStats) NullCount() uint64 {
    93  	return s.Statistic.NullCount()
    94  
    95  }
    96  
    97  func (s *DoltStats) AvgSize() uint64 {
    98  	return s.Statistic.AvgSize()
    99  
   100  }
   101  
   102  func (s *DoltStats) CreatedAt() time.Time {
   103  	return s.Statistic.CreatedAt()
   104  
   105  }
   106  
   107  func (s *DoltStats) Columns() []string {
   108  	return s.Statistic.Columns()
   109  }
   110  
   111  func (s *DoltStats) Types() []sql.Type {
   112  	return s.Statistic.Types()
   113  }
   114  
   115  func (s *DoltStats) Qualifier() sql.StatQualifier {
   116  	return s.Statistic.Qualifier()
   117  }
   118  
   119  func (s *DoltStats) IndexClass() sql.IndexClass {
   120  	return s.Statistic.IndexClass()
   121  }
   122  
   123  func (s *DoltStats) FuncDeps() *sql.FuncDepSet {
   124  	return s.Statistic.FuncDeps()
   125  }
   126  
   127  func (s *DoltStats) ColSet() sql.ColSet {
   128  	return s.Statistic.ColSet()
   129  }
   130  
   131  func (s *DoltStats) LowerBound() sql.Row {
   132  	return s.Statistic.LowerBound()
   133  }
   134  
   135  func NewDoltStats() *DoltStats {
   136  	return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}}
   137  }
   138  
   139  func (s *DoltStats) ToInterface() (interface{}, error) {
   140  	statVal, err := s.Statistic.ToInterface()
   141  	if err != nil {
   142  		return nil, err
   143  	}
   144  	ret := statVal.(map[string]interface{})
   145  
   146  	var hist sql.Histogram
   147  	for _, b := range s.Hist {
   148  		hist = append(hist, b)
   149  	}
   150  	histVal, err := hist.ToInterface()
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	ret["statistic"].(map[string]interface{})["buckets"] = histVal
   155  	return ret, nil
   156  }
   157  
   158  func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
   159  	ret := *s
   160  	ret.Hist = nil
   161  	for _, b := range h {
   162  		doltB, ok := b.(DoltBucket)
   163  		if !ok {
   164  			return nil, fmt.Errorf("invalid bucket type: %T", b)
   165  		}
   166  		ret.Hist = append(ret.Hist, doltB)
   167  	}
   168  	return &ret, nil
   169  }
   170  
   171  func (s *DoltStats) Histogram() sql.Histogram {
   172  	return s.Hist
   173  }
   174  
   175  func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) {
   176  	hist, err := DoltHistFromSql(stat.Histogram(), stat.Types())
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  	ret := &DoltStats{
   181  		mu:        &sync.Mutex{},
   182  		Hist:      hist,
   183  		Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()),
   184  		Active:    make(map[hash.Hash]int),
   185  	}
   186  	ret.Statistic.Fds = stat.FuncDeps()
   187  	ret.Statistic.Colset = stat.ColSet()
   188  	return ret, nil
   189  }
   190  
   191  func (s *DoltStats) UpdateActive() {
   192  	s.mu.Lock()
   193  	defer s.mu.Unlock()
   194  	newActive := make(map[hash.Hash]int)
   195  	for i, hash := range s.Chunks {
   196  		newActive[hash] = i
   197  	}
   198  	s.Active = newActive
   199  }
   200  
   201  type DoltHistogram []DoltBucket
   202  
   203  type DoltBucket struct {
   204  	*stats.Bucket
   205  	Chunk   hash.Hash
   206  	Created time.Time
   207  }
   208  
   209  func DoltBucketChunk(b sql.HistogramBucket) hash.Hash {
   210  	return b.(DoltBucket).Chunk
   211  }
   212  
   213  func DoltBucketCreated(b sql.HistogramBucket) time.Time {
   214  	return b.(DoltBucket).Created
   215  }
   216  
   217  var _ sql.HistogramBucket = (*DoltBucket)(nil)
   218  
   219  func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) {
   220  	ret := make(sql.Histogram, len(hist))
   221  	var err error
   222  	for i, b := range hist {
   223  		upperBound := make(sql.Row, len(b.UpperBound()))
   224  		for i, v := range b.UpperBound() {
   225  			upperBound[i], _, err = types[i].Convert(v)
   226  			if err != nil {
   227  				return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
   228  			}
   229  		}
   230  		mcvs := make([]sql.Row, len(b.Mcvs()))
   231  		for i, mcv := range b.Mcvs() {
   232  			for _, v := range mcv {
   233  				conv, _, err := types[i].Convert(v)
   234  				if err != nil {
   235  					return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
   236  				}
   237  				mcvs[i] = append(mcvs[i], conv)
   238  			}
   239  		}
   240  		ret[i] = DoltBucket{
   241  			Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs),
   242  		}
   243  	}
   244  	return ret, nil
   245  }