github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/statspro/update_test.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package statspro
    16  
    17  import (
    18  	"container/heap"
    19  	"context"
    20  	"fmt"
    21  	"testing"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  	"github.com/dolthub/go-mysql-server/sql/stats"
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  
    28  	"github.com/dolthub/dolt/go/store/pool"
    29  	"github.com/dolthub/dolt/go/store/prolly/tree"
    30  	"github.com/dolthub/dolt/go/store/val"
    31  )
    32  
    33  func TestMcvHeap(t *testing.T) {
    34  	h := new(mcvHeap)
    35  	for i := 0; i < 10; i++ {
    36  		heap.Push(h, mcv{val.Tuple{byte(i)}, i})
    37  		if i > 2 {
    38  			heap.Pop(h)
    39  		}
    40  	}
    41  	require.Equal(t, 3, h.Len())
    42  	require.Equal(t, 3, len(h.Counts()))
    43  	for _, cnt := range h.Counts() {
    44  		switch int(cnt) {
    45  		case 7, 8, 9:
    46  		default:
    47  			t.Errorf("unexpected value in mcvHeap: %d", cnt)
    48  		}
    49  	}
    50  	cmp := []int{7, 8, 9}
    51  	var res []int
    52  	for i := 0; h.Len() > 0; i++ {
    53  		next := heap.Pop(h)
    54  		res = append(res, next.(mcv).cnt)
    55  	}
    56  	require.Equal(t, cmp, res)
    57  }
    58  
    59  func TestBucketBuilder(t *testing.T) {
    60  	tests := []struct {
    61  		name    string
    62  		keys    []sql.Row
    63  		keyDesc val.TupleDesc
    64  		bucket  DoltBucket
    65  	}{
    66  		{
    67  			name:    "ints",
    68  			keys:    []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
    69  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
    70  			bucket: DoltBucket{Bucket: &stats.Bucket{
    71  				RowCnt:      15,
    72  				DistinctCnt: 5,
    73  				McvVals:     []sql.Row{{int64(4)}, {int64(2)}, {int64(3)}},
    74  				McvsCnt:     []uint64{3, 4, 3},
    75  				BoundVal:    sql.Row{int64(5)},
    76  				BoundCnt:    2,
    77  			}},
    78  		},
    79  		{
    80  			// technically nulls should be at beginning
    81  			name:    "ints with middle nulls",
    82  			keys:    []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}},
    83  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
    84  			bucket: DoltBucket{Bucket: &stats.Bucket{
    85  				RowCnt:      16,
    86  				DistinctCnt: 6,
    87  				NullCnt:     3,
    88  				McvVals:     []sql.Row{{int64(4)}, {int64(2)}, {nil}},
    89  				McvsCnt:     []uint64{3, 4, 3},
    90  				BoundVal:    sql.Row{int64(5)},
    91  				BoundCnt:    2,
    92  			}},
    93  		},
    94  		{
    95  			name:    "ints with beginning nulls",
    96  			keys:    []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
    97  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
    98  			bucket: DoltBucket{Bucket: &stats.Bucket{
    99  				RowCnt:      15,
   100  				DistinctCnt: 6,
   101  				NullCnt:     2,
   102  				McvVals:     []sql.Row{{int64(3)}, {int64(4)}, {int64(2)}},
   103  				McvsCnt:     []uint64{3, 3, 4},
   104  				BoundVal:    sql.Row{int64(5)},
   105  				BoundCnt:    2,
   106  			}},
   107  		},
   108  		{
   109  			name:    "more ints",
   110  			keys:    []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}},
   111  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
   112  			bucket: DoltBucket{Bucket: &stats.Bucket{
   113  				RowCnt:      22,
   114  				DistinctCnt: 7,
   115  				BoundCnt:    1,
   116  				McvVals:     []sql.Row{{int64(2)}, {int64(6)}, {int64(5)}},
   117  				McvsCnt:     []uint64{4, 4, 4},
   118  				BoundVal:    sql.Row{int64(7)},
   119  			}},
   120  		},
   121  		{
   122  			name:    "2-ints",
   123  			keys:    []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}},
   124  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
   125  			bucket: DoltBucket{Bucket: &stats.Bucket{
   126  				RowCnt:      15,
   127  				DistinctCnt: 11,
   128  				McvVals:     []sql.Row{{int64(1), int64(1)}, {int64(4), int64(1)}, {int64(2), int64(3)}},
   129  				McvsCnt:     []uint64{2, 3, 2},
   130  				BoundVal:    sql.Row{int64(5), int64(2)},
   131  				BoundCnt:    1,
   132  			}},
   133  		},
   134  		{
   135  			name:    "2-ints with nulls",
   136  			keys:    []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}},
   137  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}),
   138  			bucket: DoltBucket{Bucket: &stats.Bucket{
   139  				RowCnt:      5,
   140  				DistinctCnt: 5,
   141  				NullCnt:     3,
   142  				McvVals:     []sql.Row{{int64(2), int64(2)}, {int64(1), nil}, {int64(1), int64(2)}},
   143  				McvsCnt:     []uint64{1, 1, 1},
   144  				BoundVal:    sql.Row{int64(2), int64(2)},
   145  				BoundCnt:    1},
   146  			},
   147  		},
   148  		{
   149  			name:    "varchars",
   150  			keys:    []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}},
   151  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}),
   152  			bucket: DoltBucket{Bucket: &stats.Bucket{
   153  				RowCnt:      15,
   154  				DistinctCnt: 9,
   155  				McvVals:     []sql.Row{{"i"}, {"h"}, {"g"}},
   156  				McvsCnt:     []uint64{2, 3, 3},
   157  				BoundVal:    sql.Row{"i"},
   158  				BoundCnt:    2,
   159  			}},
   160  		},
   161  		{
   162  			name:    "varchar-ints",
   163  			keys:    []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}},
   164  			keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
   165  			bucket: DoltBucket{Bucket: &stats.Bucket{
   166  				RowCnt:      15,
   167  				DistinctCnt: 12,
   168  				McvVals:     []sql.Row{{"i", int64(1)}, {"g", int64(2)}, {"h", int64(1)}},
   169  				McvsCnt:     []uint64{2, 2, 2},
   170  				BoundVal:    sql.Row{"i", int64(1)},
   171  				BoundCnt:    2,
   172  			}},
   173  		},
   174  	}
   175  
   176  	ctx := context.Background()
   177  	pool := pool.NewBuffPool()
   178  	for _, tt := range tests {
   179  		t.Run(fmt.Sprintf("build bucket: %s", tt.name), func(t *testing.T) {
   180  			b := newBucketBuilder(sql.StatQualifier{}, tt.keyDesc.Count(), tt.keyDesc)
   181  			kb := val.NewTupleBuilder(tt.keyDesc)
   182  			for _, k := range tt.keys {
   183  				for i, v := range k {
   184  					// |ns| only needed for out of band tuples
   185  					err := tree.PutField(ctx, nil, kb, i, v)
   186  					assert.NoError(t, err)
   187  				}
   188  				b.add(kb.Build(pool))
   189  			}
   190  			// |ns| only needed for out of band tuples
   191  			bucket, err := b.finalize(ctx, nil)
   192  			require.NoError(t, err)
   193  
   194  			require.Equal(t, int(tt.bucket.RowCount()), int(bucket.RowCount()))
   195  			require.Equal(t, int(tt.bucket.NullCount()), int(bucket.NullCount()))
   196  			require.Equal(t, int(tt.bucket.DistinctCount()), int(bucket.DistinctCount()))
   197  			require.Equal(t, int(tt.bucket.BoundCount()), int(bucket.BoundCount()))
   198  			require.Equal(t, tt.bucket.UpperBound(), bucket.UpperBound())
   199  			require.Equal(t, tt.bucket.McvsCnt, bucket.McvsCnt)
   200  			require.Equal(t, tt.bucket.Mcvs(), bucket.Mcvs())
   201  		})
   202  	}
   203  }