github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/histogram_test.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package stats 12 13 import ( 14 "fmt" 15 "math" 16 "math/rand" 17 "testing" 18 19 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/cockroach/pkg/util/encoding" 22 ) 23 24 func TestEquiDepthHistogram(t *testing.T) { 25 type expBucket struct { 26 upper int 27 numEq int64 28 numLess int64 29 distinctLess float64 30 } 31 testCases := []struct { 32 samples []int64 33 numRows int64 34 distinctCount int64 35 maxBuckets int 36 buckets []expBucket 37 }{ 38 { 39 samples: []int64{1, 2, 4, 5, 5, 9}, 40 numRows: 6, 41 distinctCount: 5, 42 maxBuckets: 3, 43 buckets: []expBucket{ 44 { 45 // Bucket contains 1. 46 upper: 1, numEq: 1, numLess: 0, distinctLess: 0, 47 }, 48 { 49 // Bucket contains 2, 4. 50 upper: 4, numEq: 1, numLess: 1, distinctLess: 0.73, 51 }, 52 { 53 // Bucket contains 5, 5, 9. 54 upper: 9, numEq: 1, numLess: 2, distinctLess: 1.27, 55 }, 56 }, 57 }, 58 { 59 samples: []int64{1, 1, 1, 1, 2, 2}, 60 numRows: 6, 61 distinctCount: 2, 62 maxBuckets: 2, 63 buckets: []expBucket{ 64 { 65 // Bucket contains 1, 1, 1, 1. 66 upper: 1, numEq: 4, numLess: 0, distinctLess: 0, 67 }, 68 { 69 // Bucket contains 2, 2. 70 upper: 2, numEq: 2, numLess: 0, distinctLess: 0, 71 }, 72 }, 73 }, 74 { 75 samples: []int64{1, 1, 1, 1, 2, 2}, 76 numRows: 6, 77 distinctCount: 2, 78 maxBuckets: 3, 79 buckets: []expBucket{ 80 { 81 // Bucket contains 1, 1, 1, 1. 82 upper: 1, numEq: 4, numLess: 0, distinctLess: 0, 83 }, 84 { 85 // Bucket contains 2, 2. 86 upper: 2, numEq: 2, numLess: 0, distinctLess: 0, 87 }, 88 }, 89 }, 90 { 91 samples: []int64{1, 1, 2, 2, 2, 2}, 92 numRows: 6, 93 distinctCount: 2, 94 maxBuckets: 2, 95 buckets: []expBucket{ 96 { 97 // Bucket contains 1, 1. 98 upper: 1, numEq: 2, numLess: 0, distinctLess: 0, 99 }, 100 { 101 // Bucket contains 2, 2, 2, 2. 102 upper: 2, numEq: 4, numLess: 0, distinctLess: 0, 103 }, 104 }, 105 }, 106 { 107 samples: []int64{1, 1, 2, 2, 2, 2}, 108 numRows: 6, 109 distinctCount: 2, 110 maxBuckets: 3, 111 buckets: []expBucket{ 112 { 113 // Bucket contains 1, 1. 114 upper: 1, numEq: 2, numLess: 0, distinctLess: 0, 115 }, 116 { 117 // Bucket contains 2, 2, 2, 2. 118 upper: 2, numEq: 4, numLess: 0, distinctLess: 0, 119 }, 120 }, 121 }, 122 { 123 samples: []int64{1, 1, 1, 1, 1, 1}, 124 numRows: 600, 125 distinctCount: 1, 126 maxBuckets: 10, 127 buckets: []expBucket{ 128 { 129 // Bucket contains everything. 130 upper: 1, numEq: 600, numLess: 0, distinctLess: 0, 131 }, 132 }, 133 }, 134 { 135 samples: []int64{1, 2, 3, 4}, 136 numRows: 4000, 137 distinctCount: 4, 138 maxBuckets: 3, 139 buckets: []expBucket{ 140 { 141 // Bucket contains 1. 142 upper: 1, numEq: 1000, numLess: 0, distinctLess: 0, 143 }, 144 { 145 // Bucket contains 2. 146 upper: 2, numEq: 1000, numLess: 0, distinctLess: 0, 147 }, 148 { 149 // Bucket contains 3, 4. 150 upper: 4, numEq: 1000, numLess: 1000, distinctLess: 1, 151 }, 152 }, 153 }, 154 { 155 samples: []int64{-9222292034315889200, -9130100296576294525, -9042492057500701159}, 156 numRows: 3000, 157 distinctCount: 300, 158 maxBuckets: 2, 159 buckets: []expBucket{ 160 { 161 // Bucket contains -9222292034315889200. 162 upper: -9222292034315889200, numEq: 1000, numLess: 0, distinctLess: 0, 163 }, 164 { 165 // Bucket contains -9130100296576294525, -9042492057500701159. 166 upper: -9042492057500701159, numEq: 1000, numLess: 1000, distinctLess: 298, 167 }, 168 }, 169 }, 170 } 171 172 evalCtx := tree.NewTestingEvalContext(cluster.MakeTestingClusterSettings()) 173 174 for i, tc := range testCases { 175 t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { 176 samples := make(tree.Datums, len(tc.samples)) 177 perm := rand.Perm(len(samples)) 178 for i := range samples { 179 // Randomly permute the samples. 180 val := tc.samples[perm[i]] 181 182 samples[i] = tree.NewDInt(tree.DInt(val)) 183 } 184 185 h, err := EquiDepthHistogram(evalCtx, samples, tc.numRows, tc.distinctCount, tc.maxBuckets) 186 if err != nil { 187 t.Fatal(err) 188 } 189 if len(h.Buckets) != len(tc.buckets) { 190 t.Fatalf("Invalid number of buckets %d, expected %d", len(h.Buckets), len(tc.buckets)) 191 } 192 for i, b := range h.Buckets { 193 _, val, err := encoding.DecodeVarintAscending(b.UpperBound) 194 if err != nil { 195 t.Fatal(err) 196 } 197 exp := tc.buckets[i] 198 if val != int64(exp.upper) { 199 t.Errorf("bucket %d: incorrect boundary %d, expected %d", i, val, exp.upper) 200 } 201 if b.NumEq != exp.numEq { 202 t.Errorf("bucket %d: incorrect EqRows %d, expected %d", i, b.NumEq, exp.numEq) 203 } 204 if b.NumRange != exp.numLess { 205 t.Errorf("bucket %d: incorrect RangeRows %d, expected %d", i, b.NumRange, exp.numLess) 206 } 207 // Round to two decimal places. 208 distinctRange := math.Round(b.DistinctRange*100.0) / 100.0 209 if distinctRange != exp.distinctLess { 210 t.Errorf("bucket %d: incorrect DistinctRows %f, expected %f", i, distinctRange, exp.distinctLess) 211 } 212 } 213 }) 214 } 215 216 t.Run("invalid-numRows", func(t *testing.T) { 217 samples := tree.Datums{tree.NewDInt(1), tree.NewDInt(2), tree.NewDInt(3)} 218 _, err := EquiDepthHistogram( 219 evalCtx, samples, 2 /* numRows */, 2 /* distinctCount */, 10, /* maxBuckets */ 220 ) 221 if err == nil { 222 t.Fatal("expected error") 223 } 224 }) 225 226 t.Run("nulls", func(t *testing.T) { 227 samples := tree.Datums{tree.NewDInt(1), tree.NewDInt(2), tree.DNull} 228 _, err := EquiDepthHistogram( 229 evalCtx, samples, 100 /* numRows */, 3 /* distinctCount */, 10, /* maxBuckets */ 230 ) 231 if err == nil { 232 t.Fatal("expected error") 233 } 234 }) 235 }