github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/feedback_test.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package statistics 15 16 import ( 17 "bytes" 18 19 . "github.com/whtcorpsinc/check" 20 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 21 "github.com/whtcorpsinc/milevadb/types" 22 "github.com/whtcorpsinc/milevadb/soliton/codec" 23 ) 24 25 var _ = Suite(&testFeedbackSuite{}) 26 27 type testFeedbackSuite struct { 28 } 29 30 func newFeedback(lower, upper, count int64) Feedback { 31 low, upp := types.NewIntCauset(lower), types.NewIntCauset(upper) 32 return Feedback{&low, &upp, count, 0} 33 } 34 35 func genFeedbacks(lower, upper int64) []Feedback { 36 var feedbacks []Feedback 37 for i := lower; i < upper; i++ { 38 feedbacks = append(feedbacks, newFeedback(i, upper, upper-i+1)) 39 } 40 return feedbacks 41 } 42 43 func appendBucket(h *Histogram, l, r int64) { 44 lower, upper := types.NewIntCauset(l), types.NewIntCauset(r) 45 h.AppendBucket(&lower, &upper, 0, 0) 46 } 47 48 func genHistogram() *Histogram { 49 h := NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0) 50 appendBucket(h, 1, 1) 51 appendBucket(h, 2, 3) 52 appendBucket(h, 5, 7) 53 appendBucket(h, 10, 20) 54 appendBucket(h, 30, 50) 55 return h 56 } 57 58 func (s *testFeedbackSuite) TestUFIDelateHistogram(c *C) { 59 feedbacks := []Feedback{ 60 newFeedback(0, 1, 10000), 61 newFeedback(1, 2, 1), 62 newFeedback(2, 3, 3), 63 newFeedback(4, 5, 2), 64 newFeedback(5, 7, 4), 65 } 66 feedbacks = append(feedbacks, genFeedbacks(8, 20)...) 67 feedbacks = append(feedbacks, genFeedbacks(21, 60)...) 68 69 q := NewQueryFeedback(0, genHistogram(), 0, false) 70 q.Feedback = feedbacks 71 originBucketCount := defaultBucketCount 72 defaultBucketCount = 7 73 defer func() { defaultBucketCount = originBucketCount }() 74 c.Assert(UFIDelateHistogram(q.Hist, q).ToString(0), Equals, 75 "column:0 ndv:10053 totDefCausSize:0\n"+ 76 "num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0\n"+ 77 "num: 7 lower_bound: 2 upper_bound: 5 repeats: 0\n"+ 78 "num: 4 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ 79 "num: 11 lower_bound: 10 upper_bound: 20 repeats: 0\n"+ 80 "num: 19 lower_bound: 30 upper_bound: 49 repeats: 0\n"+ 81 "num: 11 lower_bound: 50 upper_bound: 60 repeats: 0") 82 } 83 84 func (s *testFeedbackSuite) TestSplitBuckets(c *C) { 85 // test bucket split 86 feedbacks := []Feedback{newFeedback(0, 1, 1)} 87 for i := 0; i < 100; i++ { 88 feedbacks = append(feedbacks, newFeedback(10, 15, 5)) 89 } 90 q := NewQueryFeedback(0, genHistogram(), 0, false) 91 q.Feedback = feedbacks 92 buckets, isNewBuckets, totalCount := splitBuckets(q.Hist, q) 93 c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, 94 "column:0 ndv:0 totDefCausSize:0\n"+ 95 "num: 1 lower_bound: 0 upper_bound: 1 repeats: 0\n"+ 96 "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+ 97 "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ 98 "num: 5 lower_bound: 10 upper_bound: 15 repeats: 0\n"+ 99 "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+ 100 "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0") 101 c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false}) 102 c.Assert(totalCount, Equals, int64(6)) 103 104 // test do not split if the bucket count is too small 105 feedbacks = []Feedback{newFeedback(0, 1, 100000)} 106 for i := 0; i < 100; i++ { 107 feedbacks = append(feedbacks, newFeedback(10, 15, 1)) 108 } 109 q = NewQueryFeedback(0, genHistogram(), 0, false) 110 q.Feedback = feedbacks 111 buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) 112 c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, 113 "column:0 ndv:0 totDefCausSize:0\n"+ 114 "num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+ 115 "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+ 116 "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ 117 "num: 1 lower_bound: 10 upper_bound: 15 repeats: 0\n"+ 118 "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+ 119 "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0") 120 c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false}) 121 c.Assert(totalCount, Equals, int64(100001)) 122 123 // test do not split if the result bucket count is too small 124 h := NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0) 125 appendBucket(h, 0, 1000000) 126 h.Buckets[0].Count = 1000000 127 feedbacks = feedbacks[:0] 128 for i := 0; i < 100; i++ { 129 feedbacks = append(feedbacks, newFeedback(0, 10, 1)) 130 } 131 q = NewQueryFeedback(0, h, 0, false) 132 q.Feedback = feedbacks 133 buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) 134 c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, 135 "column:0 ndv:0 totDefCausSize:0\n"+ 136 "num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0") 137 c.Assert(isNewBuckets, DeepEquals, []bool{false}) 138 c.Assert(totalCount, Equals, int64(1000000)) 139 140 // test split even if the feedback range is too small 141 h = NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0) 142 appendBucket(h, 0, 1000000) 143 feedbacks = feedbacks[:0] 144 for i := 0; i < 100; i++ { 145 feedbacks = append(feedbacks, newFeedback(0, 10, 1)) 146 } 147 q = NewQueryFeedback(0, h, 0, false) 148 q.Feedback = feedbacks 149 buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) 150 c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, 151 "column:0 ndv:0 totDefCausSize:0\n"+ 152 "num: 1 lower_bound: 0 upper_bound: 10 repeats: 0\n"+ 153 "num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0") 154 c.Assert(isNewBuckets, DeepEquals, []bool{true, true}) 155 c.Assert(totalCount, Equals, int64(1)) 156 157 // test merge the non-overlapped feedbacks. 158 h = NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 5, 0) 159 appendBucket(h, 0, 10000) 160 feedbacks = feedbacks[:0] 161 feedbacks = append(feedbacks, newFeedback(0, 4000, 4000)) 162 feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000)) 163 q = NewQueryFeedback(0, h, 0, false) 164 q.Feedback = feedbacks 165 buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) 166 c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, 167 "column:0 ndv:0 totDefCausSize:0\n"+ 168 "num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0") 169 c.Assert(isNewBuckets, DeepEquals, []bool{false}) 170 c.Assert(totalCount, Equals, int64(5001)) 171 } 172 173 func (s *testFeedbackSuite) TestMergeBuckets(c *C) { 174 originBucketCount := defaultBucketCount 175 defer func() { defaultBucketCount = originBucketCount }() 176 tests := []struct { 177 points []int64 178 counts []int64 179 isNewBuckets []bool 180 bucketCount int 181 result string 182 }{ 183 { 184 points: []int64{1, 2}, 185 counts: []int64{1}, 186 isNewBuckets: []bool{false}, 187 bucketCount: 1, 188 result: "column:0 ndv:0 totDefCausSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0", 189 }, 190 { 191 points: []int64{1, 2, 2, 3, 3, 4}, 192 counts: []int64{100000, 1, 1}, 193 isNewBuckets: []bool{false, false, false}, 194 bucketCount: 2, 195 result: "column:0 ndv:0 totDefCausSize:0\n" + 196 "num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0\n" + 197 "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0", 198 }, 199 // test do not Merge if the result bucket count is too large 200 { 201 points: []int64{1, 2, 2, 3, 3, 4, 4, 5}, 202 counts: []int64{1, 1, 100000, 100000}, 203 isNewBuckets: []bool{false, false, false, false}, 204 bucketCount: 3, 205 result: "column:0 ndv:0 totDefCausSize:0\n" + 206 "num: 2 lower_bound: 1 upper_bound: 3 repeats: 0\n" + 207 "num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0\n" + 208 "num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0", 209 }, 210 } 211 for _, t := range tests { 212 bkts := make([]bucket, 0, len(t.counts)) 213 totalCount := int64(0) 214 for i := 0; i < len(t.counts); i++ { 215 lower, upper := types.NewIntCauset(t.points[2*i]), types.NewIntCauset(t.points[2*i+1]) 216 bkts = append(bkts, bucket{&lower, &upper, t.counts[i], 0}) 217 totalCount += t.counts[i] 218 } 219 defaultBucketCount = t.bucketCount 220 bkts = mergeBuckets(bkts, t.isNewBuckets, float64(totalCount)) 221 result := buildNewHistogram(&Histogram{Tp: types.NewFieldType(allegrosql.TypeLong)}, bkts).ToString(0) 222 c.Assert(result, Equals, t.result) 223 } 224 } 225 226 func encodeInt(v int64) *types.Causet { 227 val := codec.EncodeInt(nil, v) 228 d := types.NewBytesCauset(val) 229 return &d 230 } 231 232 func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) { 233 hist := NewHistogram(0, 0, 0, 0, types.NewFieldType(allegrosql.TypeLong), 0, 0) 234 q := &QueryFeedback{Hist: hist, Tp: PkType} 235 q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0}) 236 q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(5), 1, 0}) 237 val, err := EncodeFeedback(q) 238 c.Assert(err, IsNil) 239 rq := &QueryFeedback{} 240 c.Assert(DecodeFeedback(val, rq, nil, hist.Tp), IsNil) 241 for _, fb := range rq.Feedback { 242 fb.Lower.SetBytes(codec.EncodeInt(nil, fb.Lower.GetInt64())) 243 fb.Upper.SetBytes(codec.EncodeInt(nil, fb.Upper.GetInt64())) 244 } 245 c.Assert(q.Equal(rq), IsTrue) 246 247 hist.Tp = types.NewFieldType(allegrosql.TypeBlob) 248 q = &QueryFeedback{Hist: hist} 249 q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0}) 250 q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(1), 1, 0}) 251 val, err = EncodeFeedback(q) 252 c.Assert(err, IsNil) 253 rq = &QueryFeedback{} 254 cms := NewCMSketch(4, 4) 255 c.Assert(DecodeFeedback(val, rq, cms, hist.Tp), IsNil) 256 c.Assert(cms.QueryBytes(codec.EncodeInt(nil, 0)), Equals, uint64(1)) 257 q.Feedback = q.Feedback[:1] 258 c.Assert(q.Equal(rq), IsTrue) 259 } 260 261 // Equal tests if two query feedback equal, it is only used in test. 262 func (q *QueryFeedback) Equal(rq *QueryFeedback) bool { 263 if len(q.Feedback) != len(rq.Feedback) { 264 return false 265 } 266 for i, fb := range q.Feedback { 267 rfb := rq.Feedback[i] 268 if fb.Count != rfb.Count { 269 return false 270 } 271 if fb.Lower.HoTT() == types.HoTTInt64 { 272 if fb.Lower.GetInt64() != rfb.Lower.GetInt64() { 273 return false 274 } 275 if fb.Upper.GetInt64() != rfb.Upper.GetInt64() { 276 return false 277 } 278 } else { 279 if !bytes.Equal(fb.Lower.GetBytes(), rfb.Lower.GetBytes()) { 280 return false 281 } 282 if !bytes.Equal(fb.Upper.GetBytes(), rfb.Upper.GetBytes()) { 283 return false 284 } 285 } 286 } 287 return true 288 }