github.com/dolthub/go-mysql-server@v0.18.0/sql/stats/statistic.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stats
    16  
    17  // This is a presentation layer package. Custom implementations converge here
    18  // as a conversion between SQL inputs/outputs. These do not add anything to the
    19  // interfaces defined in |sql|, but the separation is necessary for import conflicts.
    20  
    21  import (
    22  	"fmt"
    23  	"regexp"
    24  	"time"
    25  
    26  	"github.com/dolthub/vitess/go/vt/sqlparser"
    27  
    28  	"github.com/dolthub/go-mysql-server/sql"
    29  	"github.com/dolthub/go-mysql-server/sql/types"
    30  )
    31  
    32  func NewStatistic(rowCount, distinctCount, nullCount, avgSize uint64, createdAt time.Time, qualifier sql.StatQualifier, columns []string, types []sql.Type, histogram []*Bucket, class sql.IndexClass, lowerBound sql.Row) *Statistic {
    33  	return &Statistic{
    34  		RowCnt:      rowCount,
    35  		DistinctCnt: distinctCount,
    36  		NullCnt:     nullCount,
    37  		AvgRowSize:  avgSize,
    38  		Created:     createdAt,
    39  		Qual:        qualifier,
    40  		Cols:        columns,
    41  		Typs:        types,
    42  		Hist:        histogram,
    43  		IdxClass:    uint8(class),
    44  		LowerBnd:    lowerBound,
    45  	}
    46  }
    47  
    48  type Statistic struct {
    49  	RowCnt      uint64            `json:"row_count"`
    50  	DistinctCnt uint64            `json:"distinct_count"`
    51  	NullCnt     uint64            `json:"null_count"`
    52  	AvgRowSize  uint64            `json:"avg_size"`
    53  	Created     time.Time         `json:"created_at"`
    54  	Qual        sql.StatQualifier `json:"qualifier"`
    55  	Cols        []string          `json:"columns"`
    56  	Typs        []sql.Type        `json:"-"`
    57  	Hist        []*Bucket         `json:"buckets"`
    58  	IdxClass    uint8             `json:"index_class"`
    59  	LowerBnd    sql.Row           `json:"lower_bound"`
    60  	fds         *sql.FuncDepSet   `json:"-"`
    61  	colSet      sql.ColSet        `json:"-"`
    62  }
    63  
    64  var _ sql.JSONWrapper = (*Statistic)(nil)
    65  var _ sql.Statistic = (*Statistic)(nil)
    66  
    67  func (s *Statistic) FuncDeps() *sql.FuncDepSet {
    68  	return s.fds
    69  }
    70  
    71  func (s *Statistic) WithFuncDeps(fds *sql.FuncDepSet) sql.Statistic {
    72  	ret := *s
    73  	ret.fds = fds
    74  	return &ret
    75  }
    76  
    77  func (s *Statistic) LowerBound() sql.Row {
    78  	return s.LowerBnd
    79  }
    80  
    81  func (s *Statistic) ColSet() sql.ColSet {
    82  	return s.colSet
    83  }
    84  
    85  func (s *Statistic) WithColSet(cols sql.ColSet) sql.Statistic {
    86  	ret := *s
    87  	ret.colSet = cols
    88  	return &ret
    89  }
    90  
    91  func (s *Statistic) SetTypes(t []sql.Type) {
    92  	s.Typs = t
    93  }
    94  
    95  func (s *Statistic) SetColumns(c []string) {
    96  	s.Cols = c
    97  }
    98  
    99  func (s *Statistic) SetQualifier(q sql.StatQualifier) {
   100  	s.Qual = q
   101  }
   102  
   103  func (s *Statistic) RowCount() uint64 {
   104  	return s.RowCnt
   105  }
   106  
   107  func (s *Statistic) DistinctCount() uint64 {
   108  	return s.DistinctCnt
   109  }
   110  
   111  func (s *Statistic) NullCount() uint64 {
   112  	return s.NullCnt
   113  }
   114  
   115  func (s *Statistic) AvgSize() uint64 {
   116  	return s.AvgRowSize
   117  }
   118  
   119  func (s *Statistic) CreatedAt() time.Time {
   120  	return s.Created
   121  }
   122  
   123  func (s *Statistic) Columns() []string {
   124  	return s.Cols
   125  }
   126  
   127  func (s *Statistic) Qualifier() sql.StatQualifier {
   128  	return s.Qual
   129  }
   130  
   131  func (s *Statistic) Types() []sql.Type {
   132  	return s.Typs
   133  }
   134  
   135  func (s *Statistic) Histogram() sql.Histogram {
   136  	buckets := make([]sql.HistogramBucket, len(s.Hist))
   137  	for i, b := range s.Hist {
   138  		buckets[i] = b
   139  	}
   140  	return buckets
   141  }
   142  
   143  func (s *Statistic) WithDistinctCount(i uint64) sql.Statistic {
   144  	ret := *s
   145  	ret.DistinctCnt = i
   146  	return &ret
   147  }
   148  
   149  func (s *Statistic) WithRowCount(i uint64) sql.Statistic {
   150  	ret := *s
   151  	ret.RowCnt = i
   152  	return &ret
   153  }
   154  
   155  func (s *Statistic) WithNullCount(i uint64) sql.Statistic {
   156  	ret := *s
   157  	ret.NullCnt = i
   158  	return &ret
   159  }
   160  
   161  func (s *Statistic) WithAvgSize(i uint64) sql.Statistic {
   162  	ret := *s
   163  	ret.AvgRowSize = i
   164  	return &ret
   165  }
   166  
   167  func (s *Statistic) WithLowerBound(r sql.Row) sql.Statistic {
   168  	ret := *s
   169  	ret.LowerBnd = r
   170  	return &ret
   171  }
   172  
   173  func (s *Statistic) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
   174  	ret := *s
   175  	ret.Hist = nil
   176  	for _, b := range h {
   177  		sqlB, ok := b.(*Bucket)
   178  		if !ok {
   179  			return nil, fmt.Errorf("invalid bucket type: %T", b)
   180  		}
   181  		ret.Hist = append(ret.Hist, sqlB)
   182  	}
   183  	return &ret, nil
   184  }
   185  
   186  func (s *Statistic) IndexClass() sql.IndexClass {
   187  	return sql.IndexClass(s.IdxClass)
   188  }
   189  
   190  func (s *Statistic) ToInterface() interface{} {
   191  	typs := make([]string, len(s.Typs))
   192  	for i, t := range s.Typs {
   193  		typs[i] = t.String()
   194  	}
   195  	return map[string]interface{}{
   196  		"statistic": map[string]interface{}{
   197  			"row_count":      s.RowCount(),
   198  			"null_count":     s.RowCount(),
   199  			"distinct_count": s.DistinctCount(),
   200  			"avg_size":       s.AvgSize(),
   201  			"created_at":     s.CreatedAt(),
   202  			"qualifier":      s.Qualifier().String(),
   203  			"columns":        s.Columns(),
   204  			"types:":         typs,
   205  			"buckets":        s.Histogram().ToInterface(),
   206  		},
   207  	}
   208  }
   209  
   210  func ParseTypeStrings(typs []string) ([]sql.Type, error) {
   211  	if len(typs) == 0 {
   212  		return nil, nil
   213  	}
   214  	ret := make([]sql.Type, len(typs))
   215  	var err error
   216  	typRegex := regexp.MustCompile("([a-z]+)\\((\\d+)\\)")
   217  	for i, typ := range typs {
   218  		typMatch := typRegex.FindStringSubmatch(typ)
   219  		colType := &sqlparser.ColumnType{}
   220  		if typMatch == nil {
   221  			colType.Type = typ
   222  		} else {
   223  			colType.Type = typMatch[1]
   224  			if len(typMatch) > 2 {
   225  				colType.Length = &sqlparser.SQLVal{Val: []byte(typMatch[2]), Type: sqlparser.IntVal}
   226  			}
   227  		}
   228  		ret[i], err = types.ColumnTypeToType(colType)
   229  		if err != nil {
   230  			return nil, fmt.Errorf("failed to parse histogram type: %s", typMatch)
   231  		}
   232  	}
   233  	return ret, nil
   234  }
   235  
   236  func NewHistogramBucket(rowCount, distinctCount, nullCount, boundCount uint64, boundValue sql.Row, mcvCounts []uint64, mcvs []sql.Row) *Bucket {
   237  	return &Bucket{
   238  		RowCnt:      rowCount,
   239  		DistinctCnt: distinctCount,
   240  		NullCnt:     nullCount,
   241  		McvsCnt:     mcvCounts,
   242  		BoundCnt:    boundCount,
   243  		BoundVal:    boundValue,
   244  		McvVals:     mcvs,
   245  	}
   246  }
   247  
   248  type Bucket struct {
   249  	RowCnt      uint64    `json:"row_count"`
   250  	DistinctCnt uint64    `json:"distinct_count"`
   251  	NullCnt     uint64    `json:"null_count"`
   252  	McvsCnt     []uint64  `json:"mcv_counts"`
   253  	BoundCnt    uint64    `json:"bound_count"`
   254  	BoundVal    sql.Row   `json:"upper_bound"`
   255  	McvVals     []sql.Row `json:"mcvs"`
   256  }
   257  
   258  var _ sql.HistogramBucket = (*Bucket)(nil)
   259  
   260  func (b Bucket) RowCount() uint64 {
   261  	return b.RowCnt
   262  }
   263  
   264  func (b Bucket) DistinctCount() uint64 {
   265  	return b.DistinctCnt
   266  }
   267  
   268  func (b Bucket) NullCount() uint64 {
   269  	return b.NullCnt
   270  }
   271  
   272  func (b Bucket) BoundCount() uint64 {
   273  	return b.BoundCnt
   274  }
   275  
   276  func (b Bucket) UpperBound() sql.Row {
   277  	return b.BoundVal
   278  }
   279  
   280  func (b Bucket) McvCounts() []uint64 {
   281  	return b.McvsCnt
   282  }
   283  
   284  func (b Bucket) Mcvs() []sql.Row {
   285  	return b.McvVals
   286  }