github.com/dolthub/go-mysql-server@v0.18.0/sql/statistics.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sql
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  	"time"
    21  )
    22  
    23  // StatisticsTable is a table that can provide information about its number of rows and other facts to improve query
    24  // planning performance.
    25  type StatisticsTable interface {
    26  	Table
    27  	// DataLength returns the length of the data file (varies by engine).
    28  	DataLength(ctx *Context) (uint64, error)
    29  	// RowCount returns the row count for this table and whether the count is exact
    30  	RowCount(ctx *Context) (uint64, bool, error)
    31  }
    32  
    33  // StatsProvider is a catalog extension for databases that can
    34  // build and provide index statistics.
    35  type StatsProvider interface {
    36  	// GetTableStats returns all statistics for the table
    37  	GetTableStats(ctx *Context, db, table string) ([]Statistic, error)
    38  	// RefreshTableStats updates all statistics associated with a given table
    39  	RefreshTableStats(ctx *Context, table Table, db string) error
    40  	// SetStats updates or overwrites a set of table statistics
    41  	SetStats(ctx *Context, stats Statistic) error
    42  	// GetStats fetches a set of statistics for a set of table columns
    43  	GetStats(ctx *Context, qual StatQualifier, cols []string) (Statistic, bool)
    44  	// DropStats deletes a set of column statistics
    45  	DropStats(ctx *Context, qual StatQualifier, cols []string) error
    46  	// DropAllStats deletes all database statistics
    47  	DropDbStats(ctx *Context, db string, flush bool) error
    48  	// RowCount returns the number of rows in a table
    49  	RowCount(ctx *Context, db, table string) (uint64, error)
    50  	// DataLength returns the estimated size of each row in the table
    51  	DataLength(ctx *Context, db, table string) (uint64, error)
    52  }
    53  
    54  type IndexClass uint8
    55  
    56  const (
    57  	IndexClassDefault = iota
    58  	IndexClassSpatial
    59  	IndexClassFulltext
    60  )
    61  
    62  // Statistic is the top-level interface for accessing cardinality and
    63  // costing estimates for an index prefix.
    64  type Statistic interface {
    65  	JSONWrapper
    66  	MutableStatistic
    67  	RowCount() uint64
    68  	DistinctCount() uint64
    69  	NullCount() uint64
    70  	AvgSize() uint64
    71  	CreatedAt() time.Time
    72  	Columns() []string
    73  	Types() []Type
    74  	Qualifier() StatQualifier
    75  	Histogram() Histogram
    76  	IndexClass() IndexClass
    77  	FuncDeps() *FuncDepSet
    78  	ColSet() ColSet
    79  	LowerBound() Row
    80  }
    81  
    82  type MutableStatistic interface {
    83  	WithColSet(ColSet) Statistic
    84  	WithFuncDeps(*FuncDepSet) Statistic
    85  	WithHistogram(Histogram) (Statistic, error)
    86  	WithDistinctCount(uint64) Statistic
    87  	WithRowCount(uint64) Statistic
    88  	WithNullCount(uint64) Statistic
    89  	WithAvgSize(uint64) Statistic
    90  	WithLowerBound(Row) Statistic
    91  }
    92  
    93  func NewQualifierFromString(q string) (StatQualifier, error) {
    94  	parts := strings.Split(q, ".")
    95  	if len(parts) < 3 {
    96  		return StatQualifier{}, fmt.Errorf("invalid qualifier string: '%s', expected '<database>.<table>.<index>'", q)
    97  	}
    98  	return StatQualifier{Database: parts[0], Tab: parts[1], Idx: parts[2]}, nil
    99  }
   100  
   101  func NewStatQualifier(db, table, index string) StatQualifier {
   102  	return StatQualifier{Database: db, Tab: table, Idx: index}
   103  }
   104  
   105  // StatQualifier is the namespace hierarchy for a given statistic.
   106  // The qualifier and set of columns completely describes a unique stat.
   107  type StatQualifier struct {
   108  	Database string `json:"database"`
   109  	Tab      string `json:"table"`
   110  	Idx      string `json:"index"`
   111  }
   112  
   113  func (q StatQualifier) String() string {
   114  	if q.Idx != "" {
   115  		return fmt.Sprintf("%s.%s.%s", q.Database, q.Tab, q.Idx)
   116  	}
   117  	return fmt.Sprintf("%s.%s", q.Database, q.Tab)
   118  }
   119  
   120  func (q StatQualifier) Empty() bool {
   121  	return q.Idx == "" || q.Tab == "" || q.Database == ""
   122  }
   123  
   124  func (q StatQualifier) Db() string {
   125  	return q.Database
   126  }
   127  
   128  func (q StatQualifier) Table() string {
   129  	return q.Tab
   130  }
   131  
   132  func (q StatQualifier) Index() string {
   133  	return q.Idx
   134  }
   135  
   136  // Histogram is a collection of non-overlapping buckets that
   137  // estimate the costing statistics for an index prefix.
   138  // Note that a non-unique key can cross bucket boundaries.
   139  type Histogram []HistogramBucket
   140  
   141  func (h Histogram) IsEmpty() bool {
   142  	return len(h) == 0
   143  }
   144  
   145  func (h Histogram) ToInterface() interface{} {
   146  	ret := make([]interface{}, len(h))
   147  	for i, b := range h {
   148  		var upperBound Row
   149  		for _, v := range b.UpperBound() {
   150  			upperBound = append(upperBound, v)
   151  		}
   152  		mcvs := make([]Row, len(b.Mcvs()))
   153  		for i, mcv := range b.Mcvs() {
   154  			var row Row
   155  			for _, v := range mcv {
   156  				row = append(row, v)
   157  			}
   158  			mcvs[i] = row
   159  		}
   160  		ret[i] = map[string]interface{}{
   161  			"row_count":      b.RowCount(),
   162  			"null_count":     b.NullCount(),
   163  			"distinct_count": b.DistinctCount(),
   164  			"bound_count":    b.BoundCount(),
   165  			"mcv_counts":     b.McvCounts(),
   166  			"mcvs":           mcvs,
   167  			"upper_bound":    upperBound,
   168  		}
   169  	}
   170  	return ret
   171  }
   172  
   173  func (h Histogram) DebugString() string {
   174  	var bounds []string
   175  	var cnts []int
   176  	var allCnt int
   177  	for _, bucket := range h {
   178  		cnt := int(bucket.RowCount())
   179  		var key []string
   180  		for _, v := range bucket.UpperBound() {
   181  			key = append(key, fmt.Sprintf("%v", v))
   182  		}
   183  		bounds = append(bounds, strings.Join(key, ","))
   184  		allCnt += cnt
   185  		cnts = append(cnts, cnt)
   186  	}
   187  
   188  	flatten := 50 / float64(allCnt)
   189  	b := strings.Builder{}
   190  	b.WriteString("histogram:\n")
   191  	for j, bound := range bounds {
   192  		b.WriteString(bound + ": ")
   193  		for i := 0; i < int(float64(cnts[j])*flatten); i++ {
   194  			b.WriteString("*")
   195  		}
   196  		fmt.Fprintf(&b, "(%d)\n", cnts[j])
   197  	}
   198  	return b.String()
   199  }
   200  
   201  // HistogramBucket contains statistics for a fragment of an
   202  // index's keyspace.
   203  type HistogramBucket interface {
   204  	RowCount() uint64
   205  	DistinctCount() uint64
   206  	NullCount() uint64
   207  	BoundCount() uint64
   208  	UpperBound() Row
   209  	McvCounts() []uint64
   210  	Mcvs() []Row
   211  }
   212  
   213  // JSONWrapper is an integrator specific implementation of a JSON field value.
   214  // The query engine can utilize these optimized access methods improve performance
   215  // by minimizing the need to unmarshall a JSONWrapper into a JSONDocument.
   216  type JSONWrapper interface {
   217  	// ToInterface converts a JSONWrapper to an interface{} of simple types
   218  	ToInterface() interface{}
   219  }