github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/sample/sample_test.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sample 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/container/batch" 19 "github.com/matrixorigin/matrixone/pkg/container/types" 20 "github.com/matrixorigin/matrixone/pkg/container/vector" 21 "github.com/matrixorigin/matrixone/pkg/testutil" 22 "github.com/matrixorigin/matrixone/pkg/vm/process" 23 "github.com/stretchr/testify/require" 24 "testing" 25 ) 26 27 const nullFlag = int64(-65535) 28 29 func TestSamplePool(t *testing.T) { 30 proc := testutil.NewProcess() 31 32 // data source : 33 // (1, 1), (2, 2), (3, 3), (3, 4), (4, 4), 34 // (5, 5), (6, 6), (6, 6), (7, 7), (8, null). 35 rows1 := [][]int64{ 36 {1, 1}, 37 {2, 2}, 38 {3, 3}, 39 {3, 4}, 40 {4, 4}, 41 } 42 row2 := [][]int64{ 43 {5, 5}, 44 {6, 6}, 45 {6, 6}, 46 {7, 7}, 47 {8, nullFlag}, 48 } 49 b1, e1 := genSampleBatch(proc, rows1) 50 require.NoError(t, e1) 51 b2, e2 := genSampleBatch(proc, row2) 52 require.NoError(t, e2) 53 54 { 55 // sample 5 rows by second column. 56 pool1 := newSamplePoolByRows(proc, 5, 1, true) 57 err := pool1.sampleFromColumn(1, b1.Vecs[1], b1) 58 require.NoError(t, err) 59 err = pool1.sampleFromColumn(1, b2.Vecs[1], b2) 60 require.NoError(t, err) 61 62 // cannot get any result before end. 63 tbat, err := pool1.Result(false) 64 require.NoError(t, err) 65 require.Equal(t, 0, tbat.RowCount()) 66 67 // check the result. 68 // due to reorder, the result will be [sample column, normal column, rowsCount column]. 69 out, err := pool1.Result(true) 70 require.NoError(t, err) 71 require.Equal(t, 3, len(out.Vecs)) 72 require.Equal(t, 5, out.Vecs[0].Length()) 73 require.Equal(t, 5, out.Vecs[1].Length()) 74 // invalid scan row count was 9. the 10th row with null value at the sample column will be ignored. 75 require.Equal(t, int64(9), vector.GetFixedAt[int64](out.Vecs[2], 0)) 76 77 out.Clean(proc.Mp()) 78 pool1.Free() 79 } 80 81 { 82 // sample 5 rows by 2 columns. 83 pool2 := newSamplePoolByRows(proc, 5, 2, false) 84 err := pool2.sampleFromColumns(1, b1.Vecs, b1) 85 require.NoError(t, err) 86 err = pool2.sampleFromColumns(1, b2.Vecs, b2) 87 require.NoError(t, err) 88 89 tbat, err := pool2.Result(false) 90 require.NoError(t, err) 91 require.Equal(t, 0, tbat.RowCount()) 92 93 out, err := pool2.Result(true) 94 require.NoError(t, err) 95 // due to we set outputRowCount to false, the result will be [sample column, normal column]. 96 require.Equal(t, 2, len(out.Vecs)) 97 require.Equal(t, 5, out.Vecs[0].Length()) 98 require.Equal(t, 5, out.Vecs[1].Length()) 99 100 out.Clean(proc.Mp()) 101 pool2.Free() 102 } 103 104 { 105 // sample 100 % rows by the second column. 106 pool3 := newSamplePoolByPercent(proc, 100.0, 1) 107 err := pool3.sampleFromColumn(1, b1.Vecs[1], b1) 108 require.NoError(t, err) 109 110 // can take out the result before an end. 111 tbat, err := pool3.Result(false) 112 require.NoError(t, err) 113 require.Equal(t, 5, tbat.RowCount()) 114 115 err = pool3.sampleFromColumn(1, b2.Vecs[1], b2) 116 require.NoError(t, err) 117 118 out, err := pool3.Result(true) 119 require.NoError(t, err) 120 require.Equal(t, 4, out.RowCount()) 121 122 tbat.Clean(proc.Mp()) 123 out.Clean(proc.Mp()) 124 pool3.Free() 125 } 126 127 proc.FreeVectors() 128 b1.Clean(proc.Mp()) 129 b2.Clean(proc.Mp()) 130 require.Equal(t, int64(0), proc.Mp().CurrNB()) 131 } 132 133 func genSampleBatch(proc *process.Process, rows [][]int64) (*batch.Batch, error) { 134 b := batch.NewWithSize(len(rows[0])) 135 136 var err error 137 for i := range b.Vecs { 138 b.Vecs[i] = proc.GetVector(types.T_int64.ToType()) 139 140 for _, rowValue := range rows { 141 err = vector.AppendFixed[int64](b.Vecs[i], rowValue[i], rowValue[i] == nullFlag, proc.Mp()) 142 if err != nil { 143 return nil, err 144 } 145 } 146 } 147 b.SetRowCount(len(rows)) 148 return b, nil 149 } 150 151 func TestSamplePoolOthers(t *testing.T) { 152 // merge sample and sample by percent cannot be tested full. 153 s1 := newSamplePoolByRows(nil, 1, 1, false) 154 s2 := newSamplePoolByPercent(nil, 1.0, 1) 155 s3 := newSamplePoolByRowsForMerge(nil, 1, 1, false) 156 157 s1.setPerfFields(false) 158 s2.setPerfFields(false) 159 s3.setPerfFields(false) 160 require.Equal(t, true, s1.canCheckFull) 161 require.Equal(t, false, s2.canCheckFull) 162 require.Equal(t, false, s3.canCheckFull) 163 164 // once sample for each group, full check is not supported. 165 s1.setPerfFields(true) 166 s2.setPerfFields(true) 167 s3.setPerfFields(true) 168 require.Equal(t, false, s1.canCheckFull) 169 require.Equal(t, false, s2.canCheckFull) 170 require.Equal(t, false, s3.canCheckFull) 171 }