github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/dbs/cmd/importer/stats.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package main
    15  
    16  import (
    17  	"encoding/json"
    18  	"io/ioutil"
    19  	"math/rand"
    20  	"time"
    21  
    22  	"github.com/whtcorpsinc/errors"
    23  	"github.com/whtcorpsinc/log"
    24  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    25  	stats "github.com/whtcorpsinc/milevadb/statistics"
    26  	"github.com/whtcorpsinc/milevadb/statistics/handle"
    27  	"github.com/whtcorpsinc/milevadb/types"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  func loadStats(tblInfo *perceptron.TableInfo, path string) (*stats.Block, error) {
    32  	data, err := ioutil.ReadFile(path)
    33  	if err != nil {
    34  		return nil, errors.Trace(err)
    35  	}
    36  	jsTable := &handle.JSONTable{}
    37  	err = json.Unmarshal(data, jsTable)
    38  	if err != nil {
    39  		return nil, errors.Trace(err)
    40  	}
    41  	return handle.TableStatsFromJSON(tblInfo, tblInfo.ID, jsTable)
    42  }
    43  
    44  type histogram struct {
    45  	stats.Histogram
    46  
    47  	index  *perceptron.IndexInfo
    48  	avgLen int
    49  }
    50  
    51  // When the randCnt falls in the midbse of bucket, we return the idx of lower bound which is an even number.
    52  // When the randCnt falls in the end of bucket, we return the upper bound which is odd.
    53  func (h *histogram) getRandomBoundIdx() int {
    54  	cnt := h.Buckets[len(h.Buckets)-1].Count
    55  	randCnt := randInt64(0, cnt)
    56  	for i, bkt := range h.Buckets {
    57  		if bkt.Count >= randCnt {
    58  			if bkt.Count-bkt.Repeat > randCnt {
    59  				return 2 * i
    60  			}
    61  			return 2*i + 1
    62  		}
    63  	}
    64  	return 0
    65  }
    66  
    67  func (h *histogram) randInt() int64 {
    68  	idx := h.getRandomBoundIdx()
    69  	if idx%2 == 0 {
    70  		lower := h.Bounds.GetRow(idx).GetInt64(0)
    71  		upper := h.Bounds.GetRow(idx + 1).GetInt64(0)
    72  		return randInt64(lower, upper)
    73  	}
    74  	return h.Bounds.GetRow(idx).GetInt64(0)
    75  }
    76  
    77  func getValidPrefix(lower, upper string) string {
    78  	for i := range lower {
    79  		if i >= len(upper) {
    80  			log.Fatal("lower is larger than upper", zap.String("lower", lower), zap.String("upper", upper))
    81  		}
    82  		if lower[i] != upper[i] {
    83  			randCh := uint8(rand.Intn(int(upper[i]-lower[i]))) + lower[i]
    84  			newBytes := make([]byte, i, i+1)
    85  			copy(newBytes, lower[:i])
    86  			newBytes = append(newBytes, randCh)
    87  			return string(newBytes)
    88  		}
    89  	}
    90  	return lower
    91  }
    92  
    93  func (h *histogram) getAvgLen(maxLen int) int {
    94  	l := h.Bounds.NumRows()
    95  	totalLen := 0
    96  	for i := 0; i < l; i++ {
    97  		totalLen += len(h.Bounds.GetRow(i).GetString(0))
    98  	}
    99  	avg := totalLen / l
   100  	if avg > maxLen {
   101  		avg = maxLen
   102  	}
   103  	if avg == 0 {
   104  		avg = 1
   105  	}
   106  	return avg
   107  }
   108  
   109  func (h *histogram) randString() string {
   110  	idx := h.getRandomBoundIdx()
   111  	if idx%2 == 0 {
   112  		lower := h.Bounds.GetRow(idx).GetString(0)
   113  		upper := h.Bounds.GetRow(idx + 1).GetString(0)
   114  		prefix := getValidPrefix(lower, upper)
   115  		restLen := h.avgLen - len(prefix)
   116  		if restLen > 0 {
   117  			prefix = prefix + randString(restLen)
   118  		}
   119  		return prefix
   120  	}
   121  	return h.Bounds.GetRow(idx).GetString(0)
   122  }
   123  
   124  // randDate randoms a bucket and random a date between upper and lower bound.
   125  func (h *histogram) randDate(unit string, mysqlFmt string, dateFmt string) string {
   126  	idx := h.getRandomBoundIdx()
   127  	if idx%2 == 0 {
   128  		lower := h.Bounds.GetRow(idx).GetTime(0)
   129  		upper := h.Bounds.GetRow(idx + 1).GetTime(0)
   130  		diff := types.TimestamFIDeliff(unit, lower, upper)
   131  		if diff == 0 {
   132  			str, err := lower.DateFormat(mysqlFmt)
   133  			if err != nil {
   134  				log.Fatal(err.Error())
   135  			}
   136  			return str
   137  		}
   138  		delta := randInt(0, int(diff)-1)
   139  		l, err := lower.GoTime(time.Local)
   140  		if err != nil {
   141  			log.Fatal(err.Error())
   142  		}
   143  		l = l.AddDate(0, 0, delta)
   144  		return l.Format(dateFmt)
   145  	}
   146  	str, err := h.Bounds.GetRow(idx).GetTime(0).DateFormat(mysqlFmt)
   147  	if err != nil {
   148  		log.Fatal(err.Error())
   149  	}
   150  	return str
   151  }