github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/update_test.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package statspro 16 17 import ( 18 "container/heap" 19 "context" 20 "fmt" 21 "testing" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "github.com/dolthub/go-mysql-server/sql/stats" 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 28 "github.com/dolthub/dolt/go/store/pool" 29 "github.com/dolthub/dolt/go/store/prolly/tree" 30 "github.com/dolthub/dolt/go/store/val" 31 ) 32 33 func TestMcvHeap(t *testing.T) { 34 h := new(mcvHeap) 35 for i := 0; i < 10; i++ { 36 heap.Push(h, mcv{val.Tuple{byte(i)}, i}) 37 if i > 2 { 38 heap.Pop(h) 39 } 40 } 41 require.Equal(t, 3, h.Len()) 42 require.Equal(t, 3, len(h.Counts())) 43 for _, cnt := range h.Counts() { 44 switch int(cnt) { 45 case 7, 8, 9: 46 default: 47 t.Errorf("unexpected value in mcvHeap: %d", cnt) 48 } 49 } 50 cmp := []int{7, 8, 9} 51 var res []int 52 for i := 0; h.Len() > 0; i++ { 53 next := heap.Pop(h) 54 res = append(res, next.(mcv).cnt) 55 } 56 require.Equal(t, cmp, res) 57 } 58 59 func TestBucketBuilder(t *testing.T) { 60 tests := []struct { 61 name string 62 keys []sql.Row 63 keyDesc val.TupleDesc 64 bucket DoltBucket 65 }{ 66 { 67 name: "ints", 68 keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, 69 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), 70 bucket: DoltBucket{Bucket: &stats.Bucket{ 71 RowCnt: 15, 72 DistinctCnt: 5, 73 McvVals: []sql.Row{{int64(4)}, {int64(2)}, {int64(3)}}, 74 McvsCnt: []uint64{3, 4, 3}, 75 BoundVal: sql.Row{int64(5)}, 76 BoundCnt: 2, 77 }}, 78 }, 79 { 80 // technically nulls should be at beginning 81 name: "ints with middle nulls", 82 keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}}, 83 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), 84 bucket: DoltBucket{Bucket: &stats.Bucket{ 85 RowCnt: 16, 86 DistinctCnt: 6, 87 NullCnt: 3, 88 McvVals: []sql.Row{{int64(4)}, {int64(2)}, {nil}}, 89 McvsCnt: []uint64{3, 4, 3}, 90 BoundVal: sql.Row{int64(5)}, 91 BoundCnt: 2, 92 }}, 93 }, 94 { 95 name: "ints with beginning nulls", 96 keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, 97 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), 98 bucket: DoltBucket{Bucket: &stats.Bucket{ 99 RowCnt: 15, 100 DistinctCnt: 6, 101 NullCnt: 2, 102 McvVals: []sql.Row{{int64(3)}, {int64(4)}, {int64(2)}}, 103 McvsCnt: []uint64{3, 3, 4}, 104 BoundVal: sql.Row{int64(5)}, 105 BoundCnt: 2, 106 }}, 107 }, 108 { 109 name: "more ints", 110 keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}}, 111 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), 112 bucket: DoltBucket{Bucket: &stats.Bucket{ 113 RowCnt: 22, 114 DistinctCnt: 7, 115 BoundCnt: 1, 116 McvVals: []sql.Row{{int64(2)}, {int64(6)}, {int64(5)}}, 117 McvsCnt: []uint64{4, 4, 4}, 118 BoundVal: sql.Row{int64(7)}, 119 }}, 120 }, 121 { 122 name: "2-ints", 123 keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}}, 124 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), 125 bucket: DoltBucket{Bucket: &stats.Bucket{ 126 RowCnt: 15, 127 DistinctCnt: 11, 128 McvVals: []sql.Row{{int64(1), int64(1)}, {int64(4), int64(1)}, {int64(2), int64(3)}}, 129 McvsCnt: []uint64{2, 3, 2}, 130 BoundVal: sql.Row{int64(5), int64(2)}, 131 BoundCnt: 1, 132 }}, 133 }, 134 { 135 name: "2-ints with nulls", 136 keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}}, 137 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}), 138 bucket: DoltBucket{Bucket: &stats.Bucket{ 139 RowCnt: 5, 140 DistinctCnt: 5, 141 NullCnt: 3, 142 McvVals: []sql.Row{{int64(2), int64(2)}, {int64(1), nil}, {int64(1), int64(2)}}, 143 McvsCnt: []uint64{1, 1, 1}, 144 BoundVal: sql.Row{int64(2), int64(2)}, 145 BoundCnt: 1}, 146 }, 147 }, 148 { 149 name: "varchars", 150 keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}}, 151 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}), 152 bucket: DoltBucket{Bucket: &stats.Bucket{ 153 RowCnt: 15, 154 DistinctCnt: 9, 155 McvVals: []sql.Row{{"i"}, {"h"}, {"g"}}, 156 McvsCnt: []uint64{2, 3, 3}, 157 BoundVal: sql.Row{"i"}, 158 BoundCnt: 2, 159 }}, 160 }, 161 { 162 name: "varchar-ints", 163 keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}}, 164 keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), 165 bucket: DoltBucket{Bucket: &stats.Bucket{ 166 RowCnt: 15, 167 DistinctCnt: 12, 168 McvVals: []sql.Row{{"i", int64(1)}, {"g", int64(2)}, {"h", int64(1)}}, 169 McvsCnt: []uint64{2, 2, 2}, 170 BoundVal: sql.Row{"i", int64(1)}, 171 BoundCnt: 2, 172 }}, 173 }, 174 } 175 176 ctx := context.Background() 177 pool := pool.NewBuffPool() 178 for _, tt := range tests { 179 t.Run(fmt.Sprintf("build bucket: %s", tt.name), func(t *testing.T) { 180 b := newBucketBuilder(sql.StatQualifier{}, tt.keyDesc.Count(), tt.keyDesc) 181 kb := val.NewTupleBuilder(tt.keyDesc) 182 for _, k := range tt.keys { 183 for i, v := range k { 184 // |ns| only needed for out of band tuples 185 err := tree.PutField(ctx, nil, kb, i, v) 186 assert.NoError(t, err) 187 } 188 b.add(kb.Build(pool)) 189 } 190 // |ns| only needed for out of band tuples 191 bucket, err := b.finalize(ctx, nil) 192 require.NoError(t, err) 193 194 require.Equal(t, int(tt.bucket.RowCount()), int(bucket.RowCount())) 195 require.Equal(t, int(tt.bucket.NullCount()), int(bucket.NullCount())) 196 require.Equal(t, int(tt.bucket.DistinctCount()), int(bucket.DistinctCount())) 197 require.Equal(t, int(tt.bucket.BoundCount()), int(bucket.BoundCount())) 198 require.Equal(t, tt.bucket.UpperBound(), bucket.UpperBound()) 199 require.Equal(t, tt.bucket.McvsCnt, bucket.McvsCnt) 200 require.Equal(t, tt.bucket.Mcvs(), bucket.Mcvs()) 201 }) 202 } 203 }