github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/serialize.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggexec
    16  
    17  import (
    18  	hll "github.com/axiomhq/hyperloglog"
    19  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    20  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    21  	"github.com/matrixorigin/matrixone/pkg/container/types"
    22  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    23  	"github.com/matrixorigin/matrixone/pkg/sql/colexec/aggexec/algos/kmeans"
    24  )
    25  
    26  func MarshalAggFuncExec(exec AggFuncExec) ([]byte, error) {
    27  	if exec.IsDistinct() {
    28  		return nil, moerr.NewInternalErrorNoCtx("marshal distinct agg exec is not supported")
    29  	}
    30  	return exec.marshal()
    31  }
    32  
    33  func UnmarshalAggFuncExec(
    34  	mg AggMemoryManager,
    35  	data []byte) (AggFuncExec, error) {
    36  	encoded := &EncodedAgg{}
    37  	if err := encoded.Unmarshal(data); err != nil {
    38  		return nil, err
    39  	}
    40  
    41  	info := encoded.GetInfo()
    42  
    43  	exec := MakeAgg(mg, info.Id, info.IsDistinct, info.Args...)
    44  
    45  	if encoded.GetExecType() == EncodedAggExecType_special_group_concat {
    46  		if len(encoded.Groups) > 0 && len(encoded.Groups[0]) > 0 {
    47  			exec.(*groupConcatExec).separator = encoded.Groups[0]
    48  		}
    49  	}
    50  
    51  	var mp *mpool.MPool = nil
    52  	if mg != nil {
    53  		mp = mg.Mp()
    54  	}
    55  
    56  	if err := exec.unmarshal(mp, encoded.Result, encoded.Groups); err != nil {
    57  		exec.Free()
    58  		return nil, err
    59  	}
    60  	return exec, nil
    61  }
    62  
    63  var _ = CopyAggFuncExec
    64  
    65  func CopyAggFuncExec(mg AggMemoryManager, exec AggFuncExec) (AggFuncExec, error) {
    66  	bs, err := MarshalAggFuncExec(exec)
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  	return UnmarshalAggFuncExec(mg, bs)
    71  }
    72  
    73  func (exec *singleAggFuncExec1[from, to]) marshal() ([]byte, error) {
    74  	d := exec.singleAggInfo.getEncoded()
    75  	r, err := exec.ret.marshal()
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	encoded := &EncodedAgg{
    80  		ExecType: EncodedAggExecType_single_fixed_fixed,
    81  		Info:     d,
    82  		Result:   r,
    83  	}
    84  	if len(exec.groups) > 0 {
    85  		encoded.Groups = make([][]byte, len(exec.groups))
    86  		for i := range encoded.Groups {
    87  			encoded.Groups[i] = exec.groups[i].Marshal()
    88  		}
    89  	}
    90  	return encoded.Marshal()
    91  }
    92  
    93  func (exec *singleAggFuncExec1[from, to]) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
    94  	exec.groups = make([]SingleAggFromFixedRetFixed[from, to], len(groups))
    95  	for i := range exec.groups {
    96  		exec.groups[i] = exec.gGroup()
    97  		exec.groups[i].Unmarshal(groups[i])
    98  	}
    99  	return exec.ret.unmarshal(result)
   100  }
   101  
   102  func (exec *singleAggFuncExec2[from]) marshal() ([]byte, error) {
   103  	d := exec.singleAggInfo.getEncoded()
   104  	r, err := exec.ret.marshal()
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  	encoded := &EncodedAgg{
   109  		ExecType: EncodedAggExecType_single_fixed_fixed,
   110  		Info:     d,
   111  		Result:   r,
   112  	}
   113  	if len(exec.groups) > 0 {
   114  		encoded.Groups = make([][]byte, len(exec.groups))
   115  		for i := range encoded.Groups {
   116  			encoded.Groups[i] = exec.groups[i].Marshal()
   117  		}
   118  	}
   119  	return encoded.Marshal()
   120  }
   121  
   122  func (exec *singleAggFuncExec2[from]) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   123  	exec.groups = make([]SingleAggFromFixedRetVar[from], len(groups))
   124  	for i := range exec.groups {
   125  		exec.groups[i] = exec.gGroup()
   126  		exec.groups[i].Unmarshal(groups[i])
   127  	}
   128  	return exec.ret.unmarshal(result)
   129  }
   130  
   131  func (exec *singleAggFuncExec3[to]) marshal() ([]byte, error) {
   132  	d := exec.singleAggInfo.getEncoded()
   133  	r, err := exec.ret.marshal()
   134  	if err != nil {
   135  		return nil, err
   136  	}
   137  	encoded := &EncodedAgg{
   138  		ExecType: EncodedAggExecType_single_fixed_fixed,
   139  		Info:     d,
   140  		Result:   r,
   141  	}
   142  	if len(exec.groups) > 0 {
   143  		encoded.Groups = make([][]byte, len(exec.groups))
   144  		for i := range encoded.Groups {
   145  			encoded.Groups[i] = exec.groups[i].Marshal()
   146  		}
   147  	}
   148  	return encoded.Marshal()
   149  }
   150  
   151  func (exec *singleAggFuncExec3[to]) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   152  	exec.groups = make([]SingleAggFromVarRetFixed[to], len(groups))
   153  	for i := range exec.groups {
   154  		exec.groups[i] = exec.gGroup()
   155  		exec.groups[i].Unmarshal(groups[i])
   156  	}
   157  	return exec.ret.unmarshal(result)
   158  }
   159  
   160  func (exec *singleAggFuncExec4) marshal() ([]byte, error) {
   161  	d := exec.singleAggInfo.getEncoded()
   162  	r, err := exec.ret.marshal()
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  	encoded := &EncodedAgg{
   167  		ExecType: EncodedAggExecType_single_fixed_fixed,
   168  		Info:     d,
   169  		Result:   r,
   170  	}
   171  	if len(exec.groups) > 0 {
   172  		encoded.Groups = make([][]byte, len(exec.groups))
   173  		for i := range encoded.Groups {
   174  			encoded.Groups[i] = exec.groups[i].Marshal()
   175  		}
   176  	}
   177  	return encoded.Marshal()
   178  }
   179  
   180  func (exec *singleAggFuncExec4) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   181  	exec.groups = make([]SingleAggFromVarRetVar, len(groups))
   182  	for i := range exec.groups {
   183  		exec.groups[i] = exec.gGroup()
   184  		exec.groups[i].Unmarshal(groups[i])
   185  	}
   186  	return exec.ret.unmarshal(result)
   187  }
   188  
   189  func (exec *multiAggFuncExec1[to]) marshal() ([]byte, error) {
   190  	d := exec.multiAggInfo.getEncoded()
   191  	r, err := exec.ret.marshal()
   192  	if err != nil {
   193  		return nil, err
   194  	}
   195  	encoded := &EncodedAgg{
   196  		ExecType: EncodedAggExecType_multi_return_fixed,
   197  		Info:     d,
   198  		Result:   r,
   199  	}
   200  	if len(exec.groups) > 0 {
   201  		encoded.Groups = make([][]byte, len(exec.groups))
   202  		for i := range encoded.Groups {
   203  			encoded.Groups[i] = exec.groups[i].Marshal()
   204  		}
   205  	}
   206  	return encoded.Marshal()
   207  }
   208  
   209  func (exec *multiAggFuncExec1[T]) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   210  	exec.groups = make([]MultiAggRetFixed[T], len(groups))
   211  	for i := range exec.groups {
   212  		exec.groups[i] = exec.gGroup()
   213  		exec.groups[i].Unmarshal(groups[i])
   214  	}
   215  	return exec.ret.unmarshal(result)
   216  }
   217  
   218  func (exec *multiAggFuncExec2) marshal() ([]byte, error) {
   219  	d := exec.multiAggInfo.getEncoded()
   220  	r, err := exec.ret.marshal()
   221  	if err != nil {
   222  		return nil, err
   223  	}
   224  	encoded := &EncodedAgg{
   225  		ExecType: EncodedAggExecType_multi_return_fixed,
   226  		Info:     d,
   227  		Result:   r,
   228  	}
   229  	if len(exec.groups) > 0 {
   230  		encoded.Groups = make([][]byte, len(exec.groups))
   231  		for i := range encoded.Groups {
   232  			encoded.Groups[i] = exec.groups[i].Marshal()
   233  		}
   234  	}
   235  	return encoded.Marshal()
   236  }
   237  
   238  func (exec *multiAggFuncExec2) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   239  	exec.groups = make([]MultiAggRetVar, len(groups))
   240  	for i := range exec.groups {
   241  		exec.groups[i] = exec.gGroup()
   242  		exec.groups[i].Unmarshal(groups[i])
   243  	}
   244  	return exec.ret.unmarshal(result)
   245  }
   246  
   247  func (exec *groupConcatExec) marshal() ([]byte, error) {
   248  	d := exec.multiAggInfo.getEncoded()
   249  	r, err := exec.ret.marshal()
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	encoded := &EncodedAgg{
   254  		ExecType: EncodedAggExecType_special_group_concat,
   255  		Info:     d,
   256  		Result:   r,
   257  		Groups:   [][]byte{exec.separator},
   258  	}
   259  	return encoded.Marshal()
   260  }
   261  
   262  func (exec *groupConcatExec) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   263  	if err := exec.SetExtraInformation(groups[0], 0); err != nil {
   264  		return err
   265  	}
   266  	return exec.ret.unmarshal(result)
   267  }
   268  
   269  func (exec *countColumnExec) marshal() ([]byte, error) {
   270  	d := exec.singleAggInfo.getEncoded()
   271  	r, err := exec.ret.marshal()
   272  	if err != nil {
   273  		return nil, err
   274  	}
   275  	encoded := &EncodedAgg{
   276  		ExecType: EncodedAggExecType_special_count_column,
   277  		Info:     d,
   278  		Result:   r,
   279  		Groups:   nil,
   280  	}
   281  	return encoded.Marshal()
   282  }
   283  
   284  func (exec *countColumnExec) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   285  	return exec.ret.unmarshal(result)
   286  }
   287  
   288  func (exec *countStarExec) marshal() ([]byte, error) {
   289  	d := exec.singleAggInfo.getEncoded()
   290  	r, err := exec.ret.marshal()
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  	encoded := &EncodedAgg{
   295  		ExecType: EncodedAggExecType_special_count_star,
   296  		Info:     d,
   297  		Result:   r,
   298  		Groups:   nil,
   299  	}
   300  	return encoded.Marshal()
   301  }
   302  
   303  func (exec *countStarExec) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   304  	return exec.ret.unmarshal(result)
   305  }
   306  
   307  func (exec *approxCountFixedExec[T]) marshal() ([]byte, error) {
   308  	d := exec.singleAggInfo.getEncoded()
   309  	r, err := exec.ret.marshal()
   310  	if err != nil {
   311  		return nil, err
   312  	}
   313  
   314  	encoded := &EncodedAgg{
   315  		ExecType: EncodedAggExecType_special_approx_count,
   316  		Info:     d,
   317  		Result:   r,
   318  		Groups:   nil,
   319  	}
   320  	if len(exec.groups) > 0 {
   321  		encoded.Groups = make([][]byte, len(exec.groups))
   322  		for i := range encoded.Groups {
   323  			encoded.Groups[i], err = exec.groups[i].MarshalBinary()
   324  			if err != nil {
   325  				return nil, err
   326  			}
   327  		}
   328  	}
   329  	return encoded.Marshal()
   330  }
   331  
   332  func (exec *approxCountFixedExec[T]) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   333  	err := exec.ret.unmarshal(result)
   334  	if err != nil {
   335  		return err
   336  	}
   337  	if len(groups) > 0 {
   338  		exec.groups = make([]*hll.Sketch, len(groups))
   339  		for i := range exec.groups {
   340  			exec.groups[i] = hll.New()
   341  			if err = exec.groups[i].UnmarshalBinary(groups[i]); err != nil {
   342  				return err
   343  			}
   344  		}
   345  	}
   346  	return nil
   347  }
   348  
   349  func (exec *approxCountVarExec) marshal() ([]byte, error) {
   350  	d := exec.singleAggInfo.getEncoded()
   351  	r, err := exec.ret.marshal()
   352  	if err != nil {
   353  		return nil, err
   354  	}
   355  
   356  	encoded := &EncodedAgg{
   357  		ExecType: EncodedAggExecType_special_approx_count,
   358  		Info:     d,
   359  		Result:   r,
   360  		Groups:   nil,
   361  	}
   362  	if len(exec.groups) > 0 {
   363  		encoded.Groups = make([][]byte, len(exec.groups))
   364  		for i := range encoded.Groups {
   365  			encoded.Groups[i], err = exec.groups[i].MarshalBinary()
   366  			if err != nil {
   367  				return nil, err
   368  			}
   369  		}
   370  	}
   371  	return encoded.Marshal()
   372  }
   373  
   374  func (exec *approxCountVarExec) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   375  	err := exec.ret.unmarshal(result)
   376  	if err != nil {
   377  		return err
   378  	}
   379  	if len(groups) > 0 {
   380  		exec.groups = make([]*hll.Sketch, len(groups))
   381  		for i := range exec.groups {
   382  			exec.groups[i] = hll.New()
   383  			if err = exec.groups[i].UnmarshalBinary(groups[i]); err != nil {
   384  				return err
   385  			}
   386  		}
   387  	}
   388  	return nil
   389  }
   390  
   391  func (exec *medianColumnExecSelf[T, R]) marshal() ([]byte, error) {
   392  	d := exec.singleAggInfo.getEncoded()
   393  	r, err := exec.ret.marshal()
   394  	if err != nil {
   395  		return nil, err
   396  	}
   397  
   398  	encoded := &EncodedAgg{
   399  		ExecType: EncodedAggExecType_special_median,
   400  		Info:     d,
   401  		Result:   r,
   402  		Groups:   nil,
   403  	}
   404  	if len(exec.groups) > 0 {
   405  		encoded.Groups = make([][]byte, len(exec.groups))
   406  		for i := range encoded.Groups {
   407  			if encoded.Groups[i], err = exec.groups[i].MarshalBinary(); err != nil {
   408  				return nil, err
   409  			}
   410  		}
   411  	}
   412  	return encoded.Marshal()
   413  }
   414  
   415  func (exec *medianColumnExecSelf[T, R]) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   416  	if len(groups) > 0 {
   417  		exec.groups = make([]*vector.Vector, len(groups))
   418  		for i := range exec.groups {
   419  			exec.groups[i] = vector.NewVec(exec.singleAggInfo.argType)
   420  			if err := vectorUnmarshal(exec.groups[i], groups[i], mp); err != nil {
   421  				return err
   422  			}
   423  		}
   424  	}
   425  	return exec.ret.unmarshal(result)
   426  }
   427  
   428  func (exec *clusterCentersExec) marshal() ([]byte, error) {
   429  	d := exec.singleAggInfo.getEncoded()
   430  	r, err := exec.ret.marshal()
   431  	if err != nil {
   432  		return nil, err
   433  	}
   434  
   435  	encoded := &EncodedAgg{
   436  		ExecType: EncodedAggExecType_special_cluster_center,
   437  		Info:     d,
   438  		Result:   r,
   439  		Groups:   nil,
   440  	}
   441  
   442  	encoded.Groups = make([][]byte, len(exec.groupData)+1)
   443  	if len(exec.groupData) > 0 {
   444  		for i := range exec.groupData {
   445  			if encoded.Groups[i], err = exec.groupData[i].MarshalBinary(); err != nil {
   446  				return nil, err
   447  			}
   448  		}
   449  	}
   450  
   451  	{
   452  		t1 := uint16(exec.distType)
   453  		t2 := uint16(exec.initType)
   454  
   455  		bs := types.EncodeUint64(&exec.clusterCnt)
   456  		bs = append(bs, types.EncodeUint16(&t1)...)
   457  		bs = append(bs, types.EncodeUint16(&t2)...)
   458  		bs = append(bs, types.EncodeBool(&exec.normalize)...)
   459  		encoded.Groups[len(encoded.Groups)-1] = bs
   460  	}
   461  	return encoded.Marshal()
   462  }
   463  
   464  func (exec *clusterCentersExec) unmarshal(mp *mpool.MPool, result []byte, groups [][]byte) error {
   465  	if err := exec.ret.unmarshal(result); err != nil {
   466  		return err
   467  	}
   468  	if len(groups) > 0 {
   469  		exec.groupData = make([]*vector.Vector, len(groups)-1)
   470  		for i := range exec.groupData {
   471  			exec.groupData[i] = vector.NewVec(exec.singleAggInfo.argType)
   472  			if err := vectorUnmarshal(exec.groupData[i], groups[i], mp); err != nil {
   473  				return err
   474  			}
   475  		}
   476  		bs := groups[len(groups)-1]
   477  		if len(bs) != 13 { // 8+2+2+1
   478  			return moerr.NewInternalErrorNoCtx("invalid cluster center exec data")
   479  		}
   480  		exec.clusterCnt = types.DecodeUint64(bs[:8])
   481  		exec.distType = kmeans.DistanceType(types.DecodeUint16(bs[8:10]))
   482  		exec.initType = kmeans.InitType(types.DecodeUint16(bs[10:12]))
   483  		exec.normalize = types.DecodeBool(bs[12:])
   484  	}
   485  	return nil
   486  }