github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/feedback_test.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package statistics
    15  
    16  import (
    17  	"bytes"
    18  
    19  	. "github.com/whtcorpsinc/check"
    20  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    21  	"github.com/whtcorpsinc/milevadb/types"
    22  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    23  )
    24  
    25  var _ = Suite(&testFeedbackSuite{})
    26  
    27  type testFeedbackSuite struct {
    28  }
    29  
    30  func newFeedback(lower, upper, count int64) Feedback {
    31  	low, upp := types.NewIntCauset(lower), types.NewIntCauset(upper)
    32  	return Feedback{&low, &upp, count, 0}
    33  }
    34  
    35  func genFeedbacks(lower, upper int64) []Feedback {
    36  	var feedbacks []Feedback
    37  	for i := lower; i < upper; i++ {
    38  		feedbacks = append(feedbacks, newFeedback(i, upper, upper-i+1))
    39  	}
    40  	return feedbacks
    41  }
    42  
    43  func appendBucket(h *Histogram, l, r int64) {
    44  	lower, upper := types.NewIntCauset(l), types.NewIntCauset(r)
    45  	h.AppendBucket(&lower, &upper, 0, 0)
    46  }
    47  
    48  func genHistogram() *Histogram {
    49  	h := NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0)
    50  	appendBucket(h, 1, 1)
    51  	appendBucket(h, 2, 3)
    52  	appendBucket(h, 5, 7)
    53  	appendBucket(h, 10, 20)
    54  	appendBucket(h, 30, 50)
    55  	return h
    56  }
    57  
    58  func (s *testFeedbackSuite) TestUFIDelateHistogram(c *C) {
    59  	feedbacks := []Feedback{
    60  		newFeedback(0, 1, 10000),
    61  		newFeedback(1, 2, 1),
    62  		newFeedback(2, 3, 3),
    63  		newFeedback(4, 5, 2),
    64  		newFeedback(5, 7, 4),
    65  	}
    66  	feedbacks = append(feedbacks, genFeedbacks(8, 20)...)
    67  	feedbacks = append(feedbacks, genFeedbacks(21, 60)...)
    68  
    69  	q := NewQueryFeedback(0, genHistogram(), 0, false)
    70  	q.Feedback = feedbacks
    71  	originBucketCount := defaultBucketCount
    72  	defaultBucketCount = 7
    73  	defer func() { defaultBucketCount = originBucketCount }()
    74  	c.Assert(UFIDelateHistogram(q.Hist, q).ToString(0), Equals,
    75  		"column:0 ndv:10053 totDefCausSize:0\n"+
    76  			"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0\n"+
    77  			"num: 7 lower_bound: 2 upper_bound: 5 repeats: 0\n"+
    78  			"num: 4 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
    79  			"num: 11 lower_bound: 10 upper_bound: 20 repeats: 0\n"+
    80  			"num: 19 lower_bound: 30 upper_bound: 49 repeats: 0\n"+
    81  			"num: 11 lower_bound: 50 upper_bound: 60 repeats: 0")
    82  }
    83  
    84  func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
    85  	// test bucket split
    86  	feedbacks := []Feedback{newFeedback(0, 1, 1)}
    87  	for i := 0; i < 100; i++ {
    88  		feedbacks = append(feedbacks, newFeedback(10, 15, 5))
    89  	}
    90  	q := NewQueryFeedback(0, genHistogram(), 0, false)
    91  	q.Feedback = feedbacks
    92  	buckets, isNewBuckets, totalCount := splitBuckets(q.Hist, q)
    93  	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
    94  		"column:0 ndv:0 totDefCausSize:0\n"+
    95  			"num: 1 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
    96  			"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
    97  			"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
    98  			"num: 5 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
    99  			"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
   100  			"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
   101  	c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
   102  	c.Assert(totalCount, Equals, int64(6))
   103  
   104  	// test do not split if the bucket count is too small
   105  	feedbacks = []Feedback{newFeedback(0, 1, 100000)}
   106  	for i := 0; i < 100; i++ {
   107  		feedbacks = append(feedbacks, newFeedback(10, 15, 1))
   108  	}
   109  	q = NewQueryFeedback(0, genHistogram(), 0, false)
   110  	q.Feedback = feedbacks
   111  	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
   112  	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
   113  		"column:0 ndv:0 totDefCausSize:0\n"+
   114  			"num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
   115  			"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
   116  			"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
   117  			"num: 1 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
   118  			"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
   119  			"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
   120  	c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
   121  	c.Assert(totalCount, Equals, int64(100001))
   122  
   123  	// test do not split if the result bucket count is too small
   124  	h := NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0)
   125  	appendBucket(h, 0, 1000000)
   126  	h.Buckets[0].Count = 1000000
   127  	feedbacks = feedbacks[:0]
   128  	for i := 0; i < 100; i++ {
   129  		feedbacks = append(feedbacks, newFeedback(0, 10, 1))
   130  	}
   131  	q = NewQueryFeedback(0, h, 0, false)
   132  	q.Feedback = feedbacks
   133  	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
   134  	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
   135  		"column:0 ndv:0 totDefCausSize:0\n"+
   136  			"num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0")
   137  	c.Assert(isNewBuckets, DeepEquals, []bool{false})
   138  	c.Assert(totalCount, Equals, int64(1000000))
   139  
   140  	// test split even if the feedback range is too small
   141  	h = NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0)
   142  	appendBucket(h, 0, 1000000)
   143  	feedbacks = feedbacks[:0]
   144  	for i := 0; i < 100; i++ {
   145  		feedbacks = append(feedbacks, newFeedback(0, 10, 1))
   146  	}
   147  	q = NewQueryFeedback(0, h, 0, false)
   148  	q.Feedback = feedbacks
   149  	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
   150  	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
   151  		"column:0 ndv:0 totDefCausSize:0\n"+
   152  			"num: 1 lower_bound: 0 upper_bound: 10 repeats: 0\n"+
   153  			"num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0")
   154  	c.Assert(isNewBuckets, DeepEquals, []bool{true, true})
   155  	c.Assert(totalCount, Equals, int64(1))
   156  
   157  	// test merge the non-overlapped feedbacks.
   158  	h = NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0)
   159  	appendBucket(h, 0, 10000)
   160  	feedbacks = feedbacks[:0]
   161  	feedbacks = append(feedbacks, newFeedback(0, 4000, 4000))
   162  	feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000))
   163  	q = NewQueryFeedback(0, h, 0, false)
   164  	q.Feedback = feedbacks
   165  	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
   166  	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
   167  		"column:0 ndv:0 totDefCausSize:0\n"+
   168  			"num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0")
   169  	c.Assert(isNewBuckets, DeepEquals, []bool{false})
   170  	c.Assert(totalCount, Equals, int64(5001))
   171  }
   172  
   173  func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
   174  	originBucketCount := defaultBucketCount
   175  	defer func() { defaultBucketCount = originBucketCount }()
   176  	tests := []struct {
   177  		points       []int64
   178  		counts       []int64
   179  		isNewBuckets []bool
   180  		bucketCount  int
   181  		result       string
   182  	}{
   183  		{
   184  			points:       []int64{1, 2},
   185  			counts:       []int64{1},
   186  			isNewBuckets: []bool{false},
   187  			bucketCount:  1,
   188  			result:       "column:0 ndv:0 totDefCausSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0",
   189  		},
   190  		{
   191  			points:       []int64{1, 2, 2, 3, 3, 4},
   192  			counts:       []int64{100000, 1, 1},
   193  			isNewBuckets: []bool{false, false, false},
   194  			bucketCount:  2,
   195  			result: "column:0 ndv:0 totDefCausSize:0\n" +
   196  				"num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0\n" +
   197  				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0",
   198  		},
   199  		// test do not Merge if the result bucket count is too large
   200  		{
   201  			points:       []int64{1, 2, 2, 3, 3, 4, 4, 5},
   202  			counts:       []int64{1, 1, 100000, 100000},
   203  			isNewBuckets: []bool{false, false, false, false},
   204  			bucketCount:  3,
   205  			result: "column:0 ndv:0 totDefCausSize:0\n" +
   206  				"num: 2 lower_bound: 1 upper_bound: 3 repeats: 0\n" +
   207  				"num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0\n" +
   208  				"num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0",
   209  		},
   210  	}
   211  	for _, t := range tests {
   212  		bkts := make([]bucket, 0, len(t.counts))
   213  		totalCount := int64(0)
   214  		for i := 0; i < len(t.counts); i++ {
   215  			lower, upper := types.NewIntCauset(t.points[2*i]), types.NewIntCauset(t.points[2*i+1])
   216  			bkts = append(bkts, bucket{&lower, &upper, t.counts[i], 0})
   217  			totalCount += t.counts[i]
   218  		}
   219  		defaultBucketCount = t.bucketCount
   220  		bkts = mergeBuckets(bkts, t.isNewBuckets, float64(totalCount))
   221  		result := buildNewHistogram(&Histogram{Tp: types.NewFieldType(allegrosql.TypeLong)}, bkts).ToString(0)
   222  		c.Assert(result, Equals, t.result)
   223  	}
   224  }
   225  
   226  func encodeInt(v int64) *types.Causet {
   227  	val := codec.EncodeInt(nil, v)
   228  	d := types.NewBytesCauset(val)
   229  	return &d
   230  }
   231  
   232  func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) {
   233  	hist := NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 0, 0)
   234  	q := &QueryFeedback{Hist: hist, Tp: PkType}
   235  	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0})
   236  	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(5), 1, 0})
   237  	val, err := EncodeFeedback(q)
   238  	c.Assert(err, IsNil)
   239  	rq := &QueryFeedback{}
   240  	c.Assert(DecodeFeedback(val, rq, nil, hist.Tp), IsNil)
   241  	for _, fb := range rq.Feedback {
   242  		fb.Lower.SetBytes(codec.EncodeInt(nil, fb.Lower.GetInt64()))
   243  		fb.Upper.SetBytes(codec.EncodeInt(nil, fb.Upper.GetInt64()))
   244  	}
   245  	c.Assert(q.Equal(rq), IsTrue)
   246  
   247  	hist.Tp = types.NewFieldType(allegrosql.TypeBlob)
   248  	q = &QueryFeedback{Hist: hist}
   249  	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0})
   250  	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(1), 1, 0})
   251  	val, err = EncodeFeedback(q)
   252  	c.Assert(err, IsNil)
   253  	rq = &QueryFeedback{}
   254  	cms := NewCMSketch(4, 4)
   255  	c.Assert(DecodeFeedback(val, rq, cms, hist.Tp), IsNil)
   256  	c.Assert(cms.QueryBytes(codec.EncodeInt(nil, 0)), Equals, uint64(1))
   257  	q.Feedback = q.Feedback[:1]
   258  	c.Assert(q.Equal(rq), IsTrue)
   259  }
   260  
   261  // Equal tests if two query feedback equal, it is only used in test.
   262  func (q *QueryFeedback) Equal(rq *QueryFeedback) bool {
   263  	if len(q.Feedback) != len(rq.Feedback) {
   264  		return false
   265  	}
   266  	for i, fb := range q.Feedback {
   267  		rfb := rq.Feedback[i]
   268  		if fb.Count != rfb.Count {
   269  			return false
   270  		}
   271  		if fb.Lower.HoTT() == types.HoTTInt64 {
   272  			if fb.Lower.GetInt64() != rfb.Lower.GetInt64() {
   273  				return false
   274  			}
   275  			if fb.Upper.GetInt64() != rfb.Upper.GetInt64() {
   276  				return false
   277  			}
   278  		} else {
   279  			if !bytes.Equal(fb.Lower.GetBytes(), rfb.Lower.GetBytes()) {
   280  				return false
   281  			}
   282  			if !bytes.Equal(fb.Upper.GetBytes(), rfb.Upper.GetBytes()) {
   283  				return false
   284  			}
   285  		}
   286  	}
   287  	return true
   288  }