github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/sample/sample_test.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sample
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    19  	"github.com/matrixorigin/matrixone/pkg/container/types"
    20  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    21  	"github.com/matrixorigin/matrixone/pkg/testutil"
    22  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    23  	"github.com/stretchr/testify/require"
    24  	"testing"
    25  )
    26  
    27  const nullFlag = int64(-65535)
    28  
    29  func TestSamplePool(t *testing.T) {
    30  	proc := testutil.NewProcess()
    31  
    32  	// data source :
    33  	// (1, 1), (2, 2), (3, 3), (3, 4), (4, 4),
    34  	// (5, 5), (6, 6), (6, 6), (7, 7), (8, null).
    35  	rows1 := [][]int64{
    36  		{1, 1},
    37  		{2, 2},
    38  		{3, 3},
    39  		{3, 4},
    40  		{4, 4},
    41  	}
    42  	row2 := [][]int64{
    43  		{5, 5},
    44  		{6, 6},
    45  		{6, 6},
    46  		{7, 7},
    47  		{8, nullFlag},
    48  	}
    49  	b1, e1 := genSampleBatch(proc, rows1)
    50  	require.NoError(t, e1)
    51  	b2, e2 := genSampleBatch(proc, row2)
    52  	require.NoError(t, e2)
    53  
    54  	{
    55  		// sample 5 rows by second column.
    56  		pool1 := newSamplePoolByRows(proc, 5, 1, true)
    57  		err := pool1.sampleFromColumn(1, b1.Vecs[1], b1)
    58  		require.NoError(t, err)
    59  		err = pool1.sampleFromColumn(1, b2.Vecs[1], b2)
    60  		require.NoError(t, err)
    61  
    62  		// cannot get any result before end.
    63  		tbat, err := pool1.Result(false)
    64  		require.NoError(t, err)
    65  		require.Equal(t, 0, tbat.RowCount())
    66  
    67  		// check the result.
    68  		// due to reorder, the result will be [sample column, normal column, rowsCount column].
    69  		out, err := pool1.Result(true)
    70  		require.NoError(t, err)
    71  		require.Equal(t, 3, len(out.Vecs))
    72  		require.Equal(t, 5, out.Vecs[0].Length())
    73  		require.Equal(t, 5, out.Vecs[1].Length())
    74  		// invalid scan row count was 9. the 10th row with null value at the sample column will be ignored.
    75  		require.Equal(t, int64(9), vector.GetFixedAt[int64](out.Vecs[2], 0))
    76  
    77  		out.Clean(proc.Mp())
    78  		pool1.Free()
    79  	}
    80  
    81  	{
    82  		// sample 5 rows by 2 columns.
    83  		pool2 := newSamplePoolByRows(proc, 5, 2, false)
    84  		err := pool2.sampleFromColumns(1, b1.Vecs, b1)
    85  		require.NoError(t, err)
    86  		err = pool2.sampleFromColumns(1, b2.Vecs, b2)
    87  		require.NoError(t, err)
    88  
    89  		tbat, err := pool2.Result(false)
    90  		require.NoError(t, err)
    91  		require.Equal(t, 0, tbat.RowCount())
    92  
    93  		out, err := pool2.Result(true)
    94  		require.NoError(t, err)
    95  		// due to we set outputRowCount to false, the result will be [sample column, normal column].
    96  		require.Equal(t, 2, len(out.Vecs))
    97  		require.Equal(t, 5, out.Vecs[0].Length())
    98  		require.Equal(t, 5, out.Vecs[1].Length())
    99  
   100  		out.Clean(proc.Mp())
   101  		pool2.Free()
   102  	}
   103  
   104  	{
   105  		// sample 100 % rows by the second column.
   106  		pool3 := newSamplePoolByPercent(proc, 100.0, 1)
   107  		err := pool3.sampleFromColumn(1, b1.Vecs[1], b1)
   108  		require.NoError(t, err)
   109  
   110  		// can take out the result before an end.
   111  		tbat, err := pool3.Result(false)
   112  		require.NoError(t, err)
   113  		require.Equal(t, 5, tbat.RowCount())
   114  
   115  		err = pool3.sampleFromColumn(1, b2.Vecs[1], b2)
   116  		require.NoError(t, err)
   117  
   118  		out, err := pool3.Result(true)
   119  		require.NoError(t, err)
   120  		require.Equal(t, 4, out.RowCount())
   121  
   122  		tbat.Clean(proc.Mp())
   123  		out.Clean(proc.Mp())
   124  		pool3.Free()
   125  	}
   126  
   127  	proc.FreeVectors()
   128  	b1.Clean(proc.Mp())
   129  	b2.Clean(proc.Mp())
   130  	require.Equal(t, int64(0), proc.Mp().CurrNB())
   131  }
   132  
   133  func genSampleBatch(proc *process.Process, rows [][]int64) (*batch.Batch, error) {
   134  	b := batch.NewWithSize(len(rows[0]))
   135  
   136  	var err error
   137  	for i := range b.Vecs {
   138  		b.Vecs[i] = proc.GetVector(types.T_int64.ToType())
   139  
   140  		for _, rowValue := range rows {
   141  			err = vector.AppendFixed[int64](b.Vecs[i], rowValue[i], rowValue[i] == nullFlag, proc.Mp())
   142  			if err != nil {
   143  				return nil, err
   144  			}
   145  		}
   146  	}
   147  	b.SetRowCount(len(rows))
   148  	return b, nil
   149  }
   150  
   151  func TestSamplePoolOthers(t *testing.T) {
   152  	// merge sample and sample by percent cannot be tested full.
   153  	s1 := newSamplePoolByRows(nil, 1, 1, false)
   154  	s2 := newSamplePoolByPercent(nil, 1.0, 1)
   155  	s3 := newSamplePoolByRowsForMerge(nil, 1, 1, false)
   156  
   157  	s1.setPerfFields(false)
   158  	s2.setPerfFields(false)
   159  	s3.setPerfFields(false)
   160  	require.Equal(t, true, s1.canCheckFull)
   161  	require.Equal(t, false, s2.canCheckFull)
   162  	require.Equal(t, false, s3.canCheckFull)
   163  
   164  	// once sample for each group, full check is not supported.
   165  	s1.setPerfFields(true)
   166  	s2.setPerfFields(true)
   167  	s3.setPerfFields(true)
   168  	require.Equal(t, false, s1.canCheckFull)
   169  	require.Equal(t, false, s2.canCheckFull)
   170  	require.Equal(t, false, s3.canCheckFull)
   171  }