github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/serialize_test.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggexec
    16  
    17  import (
    18  	"fmt"
    19  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    20  	"github.com/matrixorigin/matrixone/pkg/container/types"
    21  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    22  	"github.com/stretchr/testify/require"
    23  	"testing"
    24  )
    25  
    26  func testAggExecSerialize(exec AggFuncExec, checkFn func(src, dst AggFuncExec) error) error {
    27  	ds, marshalErr := MarshalAggFuncExec(exec)
    28  	if marshalErr != nil {
    29  		return marshalErr
    30  	}
    31  	newExec, unmarshalErr := UnmarshalAggFuncExec(nil, ds)
    32  	if unmarshalErr != nil {
    33  		return unmarshalErr
    34  	}
    35  
    36  	if checkFn == nil {
    37  		return nil
    38  	}
    39  	return checkFn(exec, newExec)
    40  }
    41  
    42  func fillTestData(mg AggMemoryManager, groupNumber int, exec AggFuncExec, dataType types.Type) error {
    43  	if err := exec.GroupGrow(groupNumber); err != nil {
    44  		return err
    45  	}
    46  
    47  	vec := vector.NewVec(dataType)
    48  	switch dataType.Oid {
    49  	case types.T_int32:
    50  		values := make([]int32, groupNumber)
    51  		for i := 0; i < groupNumber; i++ {
    52  			values[i] = int32(i)
    53  		}
    54  		if err := vector.AppendFixedList[int32](vec, values, nil, mg.Mp()); err != nil {
    55  			return err
    56  		}
    57  
    58  	case types.T_varchar:
    59  		values := make([][]byte, groupNumber)
    60  		for i := 0; i < groupNumber; i++ {
    61  			values[i] = []byte(fmt.Sprintf("%d", i))
    62  		}
    63  		if err := vector.AppendBytesList(vec, values, nil, mg.Mp()); err != nil {
    64  			return err
    65  		}
    66  
    67  	case types.T_array_float32:
    68  		values := make([][]float32, groupNumber)
    69  		for i := 0; i < groupNumber; i++ {
    70  			values[i] = []float32{float32(i), float32(i + 1), float32(i + 2)}
    71  		}
    72  		if err := vector.AppendArrayList[float32](vec, values, nil, mg.Mp()); err != nil {
    73  			return err
    74  		}
    75  
    76  	default:
    77  		return moerr.NewInternalErrorNoCtx("agg exec ut failed: unsupported data type")
    78  	}
    79  
    80  	inputs := []*vector.Vector{vec}
    81  	for i := 0; i < groupNumber; i++ {
    82  		if err := exec.BulkFill(i, inputs); err != nil {
    83  			vec.Free(mg.Mp())
    84  			return err
    85  		}
    86  	}
    87  	vec.Free(mg.Mp())
    88  	return nil
    89  }
    90  
    91  func TestSerial_SingleAggFuncExecSerial(t *testing.T) {
    92  	mg := newTestAggMemoryManager()
    93  
    94  	info := singleAggInfo{
    95  		aggID:     gUniqueAggIdForTest(),
    96  		distinct:  false,
    97  		argType:   types.T_int32.ToType(),
    98  		retType:   types.T_int64.ToType(),
    99  		emptyNull: true,
   100  	}
   101  	RegisterSingleAggFromFixedToFixed(
   102  		MakeSingleAgg1RegisteredInfo(
   103  			MakeSingleColumnAggInformation(info.aggID, info.argType, tSinglePrivate1Ret, true, info.emptyNull),
   104  			gTestSingleAggPrivateSer1,
   105  			nil, fillSinglePrivate1, fillNullSinglePrivate1, fillsSinglePrivate1, mergeSinglePrivate1, nil))
   106  
   107  	// methods to check the correctness of the serialized AggFuncExec.
   108  	checkFn := func(src, dst AggFuncExec) error {
   109  		s1, ok1 := src.(*singleAggFuncExec1[int32, int64])
   110  		s2, ok2 := dst.(*singleAggFuncExec1[int32, int64])
   111  		if !ok1 || !ok2 {
   112  			return moerr.NewInternalErrorNoCtx("type assertion failed")
   113  		}
   114  
   115  		if !s1.singleAggInfo.eq(s2.singleAggInfo) {
   116  			return moerr.NewInternalErrorNoCtx("singleAggInfo not equal.")
   117  		}
   118  
   119  		if !s1.ret.eq(s2.ret) {
   120  			return moerr.NewInternalErrorNoCtx("ret not equal.")
   121  		}
   122  		return nil
   123  	}
   124  
   125  	{
   126  		executor := MakeAgg(
   127  			mg,
   128  			info.aggID, info.distinct, info.argType)
   129  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   130  		require.NoError(t, testAggExecSerialize(executor, checkFn))
   131  		executor.Free()
   132  	}
   133  
   134  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   135  }
   136  
   137  func TestSerial_CountColumnAggFuncExec(t *testing.T) {
   138  	mg := newTestAggMemoryManager()
   139  
   140  	info := singleAggInfo{
   141  		aggID:     gUniqueAggIdForTest(),
   142  		distinct:  false,
   143  		argType:   types.T_int32.ToType(),
   144  		retType:   CountReturnType(nil),
   145  		emptyNull: false,
   146  	}
   147  	RegisterCountColumnAgg(info.aggID)
   148  
   149  	// methods to check the correctness of the serialized AggFuncExec.
   150  	checkFn := func(src, dst AggFuncExec) error {
   151  		s1, ok1 := src.(*countColumnExec)
   152  		s2, ok2 := dst.(*countColumnExec)
   153  		if !ok1 || !ok2 {
   154  			return moerr.NewInternalErrorNoCtx("type assertion failed")
   155  		}
   156  
   157  		if !s1.singleAggInfo.eq(s2.singleAggInfo) {
   158  			return moerr.NewInternalErrorNoCtx("singleAggInfo not equal.")
   159  		}
   160  
   161  		if !s1.ret.eq(s2.ret) {
   162  			return moerr.NewInternalErrorNoCtx("ret not equal.")
   163  		}
   164  		return nil
   165  	}
   166  
   167  	{
   168  		executor := MakeAgg(
   169  			mg,
   170  			info.aggID, info.distinct, info.argType)
   171  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   172  		require.NoError(t, testAggExecSerialize(executor, checkFn))
   173  		executor.Free()
   174  	}
   175  
   176  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   177  }
   178  
   179  func TestSerial_CountStarAggFuncExec(t *testing.T) {
   180  	mg := newTestAggMemoryManager()
   181  
   182  	info := singleAggInfo{
   183  		aggID:     gUniqueAggIdForTest(),
   184  		distinct:  false,
   185  		argType:   types.T_int32.ToType(),
   186  		retType:   CountReturnType(nil),
   187  		emptyNull: false,
   188  	}
   189  	RegisterCountStarAgg(info.aggID)
   190  
   191  	// methods to check the correctness of the serialized AggFuncExec.
   192  	checkFn := func(src, dst AggFuncExec) error {
   193  		s1, ok1 := src.(*countStarExec)
   194  		s2, ok2 := dst.(*countStarExec)
   195  		if !ok1 || !ok2 {
   196  			return moerr.NewInternalErrorNoCtx("type assertion failed")
   197  		}
   198  
   199  		if !s1.singleAggInfo.eq(s2.singleAggInfo) {
   200  			return moerr.NewInternalErrorNoCtx("singleAggInfo not equal.")
   201  		}
   202  
   203  		if !s1.ret.eq(s2.ret) {
   204  			return moerr.NewInternalErrorNoCtx("ret not equal.")
   205  		}
   206  		return nil
   207  	}
   208  
   209  	{
   210  		executor := MakeAgg(
   211  			mg,
   212  			info.aggID, info.distinct, info.argType)
   213  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   214  		require.NoError(t, testAggExecSerialize(executor, checkFn))
   215  		executor.Free()
   216  	}
   217  
   218  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   219  }
   220  
   221  func TestSerial_ApproxCountFixedAggFuncExec(t *testing.T) {
   222  	mg := newTestAggMemoryManager()
   223  
   224  	info := singleAggInfo{
   225  		aggID:     gUniqueAggIdForTest(),
   226  		distinct:  false,
   227  		argType:   types.T_int32.ToType(),
   228  		retType:   types.T_uint64.ToType(),
   229  		emptyNull: false,
   230  	}
   231  	RegisterApproxCountAgg(info.aggID)
   232  
   233  	// methods to check the correctness of the serialized AggFuncExec.
   234  	checkFn := func(src, dst AggFuncExec) error {
   235  		s1, ok1 := src.(*approxCountFixedExec[int32])
   236  		s2, ok2 := dst.(*approxCountFixedExec[int32])
   237  		if !ok1 || !ok2 {
   238  			return moerr.NewInternalErrorNoCtx("type assertion failed")
   239  		}
   240  
   241  		if !s1.singleAggInfo.eq(s2.singleAggInfo) {
   242  			return moerr.NewInternalErrorNoCtx("singleAggInfo not equal.")
   243  		}
   244  
   245  		if !s1.ret.eq(s2.ret) {
   246  			return moerr.NewInternalErrorNoCtx("ret not equal.")
   247  		}
   248  
   249  		if len(s1.groups) == len(s2.groups) {
   250  			for i := 0; i < len(s1.groups); i++ {
   251  				if s1.groups[i] == nil {
   252  					continue
   253  				}
   254  
   255  				if s2.groups[i] == nil {
   256  					return moerr.NewInternalErrorNoCtx("groups not equal.")
   257  				}
   258  			}
   259  
   260  			return nil
   261  		}
   262  		return moerr.NewInternalErrorNoCtx("groups not equal.")
   263  	}
   264  
   265  	{
   266  		executor := MakeAgg(
   267  			mg,
   268  			info.aggID, info.distinct, info.argType)
   269  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   270  		require.NoError(t, testAggExecSerialize(executor, checkFn))
   271  		executor.Free()
   272  	}
   273  
   274  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   275  }
   276  
   277  func TestSerial_ApproxCountVarAggFuncExec(t *testing.T) {
   278  	mg := newTestAggMemoryManager()
   279  
   280  	info := singleAggInfo{
   281  		aggID:     gUniqueAggIdForTest(),
   282  		distinct:  false,
   283  		argType:   types.T_varchar.ToType(),
   284  		retType:   types.T_uint64.ToType(),
   285  		emptyNull: false,
   286  	}
   287  	RegisterApproxCountAgg(info.aggID)
   288  
   289  	// methods to check the correctness of the serialized AggFuncExec.
   290  	checkFn := func(src, dst AggFuncExec) error {
   291  		s1, ok1 := src.(*approxCountVarExec)
   292  		s2, ok2 := dst.(*approxCountVarExec)
   293  		if !ok1 || !ok2 {
   294  			return moerr.NewInternalErrorNoCtx("type assertion failed")
   295  		}
   296  
   297  		if !s1.singleAggInfo.eq(s2.singleAggInfo) {
   298  			return moerr.NewInternalErrorNoCtx("singleAggInfo not equal.")
   299  		}
   300  
   301  		if !s1.ret.eq(s2.ret) {
   302  			return moerr.NewInternalErrorNoCtx("ret not equal.")
   303  		}
   304  
   305  		if len(s1.groups) == len(s2.groups) {
   306  			for i := 0; i < len(s1.groups); i++ {
   307  				if s1.groups[i] == nil {
   308  					continue
   309  				}
   310  
   311  				if s2.groups[i] == nil {
   312  					return moerr.NewInternalErrorNoCtx("groups not equal.")
   313  				}
   314  			}
   315  
   316  			return nil
   317  		}
   318  		return moerr.NewInternalErrorNoCtx("groups not equal.")
   319  	}
   320  
   321  	{
   322  		executor := MakeAgg(
   323  			mg,
   324  			info.aggID, info.distinct, info.argType)
   325  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   326  		require.NoError(t, testAggExecSerialize(executor, checkFn))
   327  		executor.Free()
   328  	}
   329  
   330  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   331  }
   332  
   333  func TestSerial_ClusterCentersExec(t *testing.T) {
   334  	mg := newTestAggMemoryManager()
   335  
   336  	info := singleAggInfo{
   337  		aggID:     gUniqueAggIdForTest(),
   338  		distinct:  false,
   339  		argType:   types.T_array_float32.ToType(),
   340  		retType:   ClusterCentersReturnType(nil),
   341  		emptyNull: false,
   342  	}
   343  	RegisterClusterCenters(info.aggID)
   344  
   345  	// methods to check the correctness of the serialized AggFuncExec.
   346  	checkFn := func(src, dst AggFuncExec) error {
   347  		s1, ok1 := src.(*clusterCentersExec)
   348  		s2, ok2 := dst.(*clusterCentersExec)
   349  		if !ok1 || !ok2 {
   350  			return moerr.NewInternalErrorNoCtx("type assertion failed")
   351  		}
   352  
   353  		if !s1.singleAggInfo.eq(s2.singleAggInfo) {
   354  			return moerr.NewInternalErrorNoCtx("singleAggInfo not equal.")
   355  		}
   356  
   357  		if !s1.ret.eq(s2.ret) {
   358  			return moerr.NewInternalErrorNoCtx("ret not equal.")
   359  		}
   360  
   361  		{
   362  			// kmeans parameters check.
   363  			ne1 := s1.clusterCnt != s2.clusterCnt
   364  			ne2 := s1.distType != s2.distType
   365  			ne3 := s1.initType != s2.initType
   366  			ne4 := s1.normalize != s2.normalize
   367  			if ne1 || ne2 || ne3 || ne4 {
   368  				return moerr.NewInternalErrorNoCtx("kmeans parameters not equal.")
   369  			}
   370  		}
   371  
   372  		if len(s1.groupData) == len(s2.groupData) {
   373  			for i := 0; i < len(s1.groupData); i++ {
   374  				if s1.groupData[i] == nil || s1.groupData[i].Length() == 0 {
   375  					continue
   376  				}
   377  
   378  				if l := s1.groupData[i].Length(); l != s2.groupData[i].Length() {
   379  					return moerr.NewInternalErrorNoCtx("groupData length not equal.")
   380  				}
   381  
   382  				vs1 := vector.MustArrayCol[float32](s1.groupData[i])
   383  				vs2 := vector.MustArrayCol[float32](s2.groupData[i])
   384  				for n := 0; n < len(vs1); n++ {
   385  					for m := 0; m < len(vs1[n]); m++ {
   386  						if vs1[n][m] != vs2[n][m] {
   387  							return moerr.NewInternalErrorNoCtx("groupData item not equal.")
   388  						}
   389  					}
   390  				}
   391  			}
   392  
   393  			return nil
   394  		}
   395  		return moerr.NewInternalErrorNoCtx("groupData not equal.")
   396  	}
   397  
   398  	{
   399  		executor := MakeAgg(
   400  			mg,
   401  			info.aggID, info.distinct, info.argType)
   402  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   403  		require.NoError(t, testAggExecSerialize(executor, checkFn))
   404  		executor.Free()
   405  	}
   406  
   407  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   408  }
   409  
   410  func TestSerial_MedianColumnExec(t *testing.T) {
   411  	mg := newTestAggMemoryManager()
   412  
   413  	info := singleAggInfo{
   414  		aggID:    gUniqueAggIdForTest(),
   415  		distinct: false,
   416  		argType:  types.T_int32.ToType(),
   417  	}
   418  	info.retType = MedianReturnType([]types.Type{info.argType})
   419  
   420  	RegisterMedian(info.aggID)
   421  
   422  	// methods to check the correctness of the serialized AggFuncExec.
   423  	checkFn := func(src, dst AggFuncExec) error {
   424  		s1, ok1 := src.(*medianColumnNumericExec[int32])
   425  		s2, ok2 := dst.(*medianColumnNumericExec[int32])
   426  		if !ok1 || !ok2 {
   427  			return moerr.NewInternalErrorNoCtx("type assertion failed")
   428  		}
   429  
   430  		if !s1.singleAggInfo.eq(s2.singleAggInfo) {
   431  			return moerr.NewInternalErrorNoCtx("singleAggInfo not equal.")
   432  		}
   433  
   434  		if !s1.ret.eq(s2.ret) {
   435  			return moerr.NewInternalErrorNoCtx("ret not equal.")
   436  		}
   437  
   438  		if len(s1.groups) == len(s2.groups) {
   439  			for i := 0; i < len(s1.groups); i++ {
   440  				if s1.groups[i] == nil || s1.groups[i].Length() == 0 {
   441  					continue
   442  				}
   443  
   444  				if l := s1.groups[i].Length(); l != s2.groups[i].Length() {
   445  					return moerr.NewInternalErrorNoCtx("groupData length not equal.")
   446  				}
   447  
   448  				vs1 := vector.MustFixedCol[int32](s1.groups[i])
   449  				vs2 := vector.MustFixedCol[int32](s2.groups[i])
   450  				for n := 0; n < len(vs1); n++ {
   451  					if vs1[n] != vs2[n] {
   452  						return moerr.NewInternalErrorNoCtx("groupData item not equal.")
   453  					}
   454  				}
   455  			}
   456  
   457  			return nil
   458  		}
   459  
   460  		return moerr.NewInternalErrorNoCtx("groupData not equal.")
   461  	}
   462  
   463  	{
   464  		executor := MakeAgg(
   465  			mg,
   466  			info.aggID, info.distinct, info.argType)
   467  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   468  		require.NoError(t, testAggExecSerialize(executor, checkFn))
   469  		executor.Free()
   470  	}
   471  
   472  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   473  }
   474  
   475  type testSingleAggPrivateSer1 struct {
   476  	testSingleAggPrivate1
   477  }
   478  
   479  func gTestSingleAggPrivateSer1() SingleAggFromFixedRetFixed[int32, int64] {
   480  	return &testSingleAggPrivateSer1{}
   481  }
   482  
   483  func (s *testSingleAggPrivateSer1) Marshal() []byte {
   484  	return []byte("testSingleAggPrivateSer1")
   485  }
   486  
   487  func (s *testSingleAggPrivateSer1) Unmarshal(bs []byte) {
   488  	if string(bs) != "testSingleAggPrivateSer1" {
   489  		panic("unmarshal failed")
   490  	}
   491  }
   492  
   493  // this test is to check if the agg framework can serialize and deserialize the private struct of the agg function.
   494  func TestSerial_Agg1(t *testing.T) {
   495  	mg := newTestAggMemoryManager()
   496  
   497  	info := singleAggInfo{
   498  		aggID:     gUniqueAggIdForTest(),
   499  		distinct:  false,
   500  		argType:   types.T_int32.ToType(),
   501  		retType:   types.T_int64.ToType(),
   502  		emptyNull: true,
   503  	}
   504  	RegisterSingleAggFromFixedToFixed(
   505  		MakeSingleAgg1RegisteredInfo(
   506  			MakeSingleColumnAggInformation(info.aggID, info.argType, tSinglePrivate1Ret, true, info.emptyNull),
   507  			gTestSingleAggPrivateSer1,
   508  			nil, fillSinglePrivate1, fillNullSinglePrivate1, fillsSinglePrivate1, mergeSinglePrivate1, nil))
   509  
   510  	{
   511  		executor := MakeAgg(
   512  			mg,
   513  			info.aggID, info.distinct, info.argType)
   514  		require.NoError(t, fillTestData(mg, 10, executor, info.argType))
   515  		require.NoError(t, testAggExecSerialize(executor, nil))
   516  		executor.Free()
   517  	}
   518  
   519  	require.Equal(t, int64(0), mg.Mp().CurrNB())
   520  }