github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/concat.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggexec
    16  
    17  import (
    18  	"fmt"
    19  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    20  	"github.com/matrixorigin/matrixone/pkg/container/types"
    21  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    22  	"math"
    23  )
    24  
    25  const (
    26  	groupConcatMaxLen = 1024
    27  )
    28  
    29  // group_concat is a special string aggregation function.
    30  type groupConcatExec struct {
    31  	multiAggInfo
    32  	ret aggFuncBytesResult
    33  	distinctHash
    34  
    35  	separator []byte
    36  }
    37  
    38  func GroupConcatReturnType(args []types.Type) types.Type {
    39  	for _, p := range args {
    40  		if p.Oid == types.T_binary || p.Oid == types.T_varbinary || p.Oid == types.T_blob {
    41  			return types.T_blob.ToType()
    42  		}
    43  	}
    44  	return types.T_text.ToType()
    45  }
    46  
    47  func newGroupConcatExec(mg AggMemoryManager, info multiAggInfo, separator string) AggFuncExec {
    48  	exec := &groupConcatExec{
    49  		multiAggInfo: info,
    50  		ret:          initBytesAggFuncResult(mg, info.retType, info.emptyNull),
    51  		separator:    []byte(separator),
    52  	}
    53  	if info.distinct {
    54  		exec.distinctHash = newDistinctHash(mg.Mp(), false)
    55  	}
    56  	return exec
    57  }
    58  
    59  func isValidGroupConcatUnit(value []byte) error {
    60  	if len(value) > math.MaxUint16 {
    61  		return moerr.NewInternalErrorNoCtx("group_concat: the length of the value is too long")
    62  	}
    63  	return nil
    64  }
    65  
    66  func (exec *groupConcatExec) GroupGrow(more int) error {
    67  	if exec.IsDistinct() {
    68  		if err := exec.distinctHash.grows(more); err != nil {
    69  			return err
    70  		}
    71  	}
    72  	return exec.ret.grows(more)
    73  }
    74  
    75  func (exec *groupConcatExec) PreAllocateGroups(more int) error {
    76  	return exec.ret.preAllocate(more)
    77  }
    78  
    79  func (exec *groupConcatExec) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
    80  	// if any value was null, there is no need to Fill.
    81  	u64Row := uint64(row)
    82  	for _, v := range vectors {
    83  		if v.IsNull(u64Row) {
    84  			return nil
    85  		}
    86  	}
    87  
    88  	if exec.IsDistinct() {
    89  		if need, err := exec.distinctHash.fill(groupIndex, vectors, row); err != nil || !need {
    90  			return err
    91  		}
    92  	}
    93  
    94  	exec.ret.groupToSet = groupIndex
    95  	exec.ret.setGroupNotEmpty(groupIndex)
    96  	r := exec.ret.aggGet()
    97  	if len(r) > groupConcatMaxLen {
    98  		return nil
    99  	}
   100  	if len(r) > 0 {
   101  		r = append(r, exec.separator...)
   102  	}
   103  
   104  	var err error
   105  	for i, v := range vectors {
   106  		if r, err = oidToConcatFunc[exec.multiAggInfo.argTypes[i].Oid](v, row, r); err != nil {
   107  			return err
   108  		}
   109  	}
   110  	if err = exec.ret.aggSet(r); err != nil {
   111  		return err
   112  	}
   113  	return nil
   114  }
   115  
   116  func (exec *groupConcatExec) BulkFill(groupIndex int, vectors []*vector.Vector) error {
   117  	exec.ret.groupToSet = groupIndex
   118  	for row, end := 0, vectors[0].Length(); row < end; row++ {
   119  		if err := exec.Fill(groupIndex, row, vectors); err != nil {
   120  			return err
   121  		}
   122  	}
   123  	return nil
   124  }
   125  
   126  func (exec *groupConcatExec) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   127  	for i, j, idx := offset, offset+len(groups), 0; i < j; i++ {
   128  		if groups[idx] != GroupNotMatched {
   129  			if err := exec.Fill(int(groups[idx]-1), i, vectors); err != nil {
   130  				return err
   131  			}
   132  		}
   133  		idx++
   134  	}
   135  	return nil
   136  }
   137  
   138  func (exec *groupConcatExec) SetExtraInformation(partialResult any, groupIndex int) error {
   139  	// todo: too bad here.
   140  	exec.separator = partialResult.([]byte)
   141  	return nil
   142  }
   143  
   144  func (exec *groupConcatExec) merge(other *groupConcatExec, idx1, idx2 int) error {
   145  	exec.ret.groupToSet = idx1
   146  	other.ret.groupToSet = idx2
   147  	if err := exec.distinctHash.merge(&other.distinctHash); err != nil {
   148  		return err
   149  	}
   150  
   151  	v1 := exec.ret.aggGet()
   152  	v2 := other.ret.aggGet()
   153  	if len(v2) == 0 || len(v1) > groupConcatMaxLen {
   154  		return nil
   155  	}
   156  	if len(v1) > 0 && len(v2) > 0 {
   157  		v1 = append(v1, exec.separator...)
   158  		v1 = append(v1, v2...)
   159  		return exec.ret.aggSet(v1)
   160  	}
   161  	if len(v1) == 0 {
   162  		return exec.ret.aggSet(v2)
   163  	}
   164  	return exec.ret.aggSet(v1)
   165  }
   166  
   167  func (exec *groupConcatExec) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error {
   168  	return exec.merge(next.(*groupConcatExec), groupIdx1, groupIdx2)
   169  }
   170  
   171  func (exec *groupConcatExec) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   172  	other := next.(*groupConcatExec)
   173  	for i := range groups {
   174  		if groups[i] == GroupNotMatched {
   175  			continue
   176  		}
   177  		if err := exec.merge(other, int(groups[i])-1, i+offset); err != nil {
   178  			return err
   179  		}
   180  	}
   181  	return nil
   182  }
   183  
   184  func (exec *groupConcatExec) Flush() (*vector.Vector, error) {
   185  	return exec.ret.flush(), nil
   186  }
   187  
   188  func (exec *groupConcatExec) Free() {
   189  	exec.distinctHash.free()
   190  	exec.ret.free()
   191  }
   192  
   193  var GroupConcatUnsupportedTypes = []types.T{
   194  	types.T_tuple,
   195  }
   196  
   197  func IsGroupConcatSupported(t types.Type) bool {
   198  	for _, unsupported := range GroupConcatUnsupportedTypes {
   199  		if t.Oid == unsupported {
   200  			return false
   201  		}
   202  	}
   203  	return true
   204  }
   205  
   206  var oidToConcatFunc = map[types.T]func(*vector.Vector, int, []byte) ([]byte, error){
   207  	types.T_bit:           concatFixed[uint64],
   208  	types.T_bool:          concatFixed[bool],
   209  	types.T_int8:          concatFixed[int8],
   210  	types.T_int16:         concatFixed[int16],
   211  	types.T_int32:         concatFixed[int32],
   212  	types.T_int64:         concatFixed[int64],
   213  	types.T_uint8:         concatFixed[uint8],
   214  	types.T_uint16:        concatFixed[uint16],
   215  	types.T_uint32:        concatFixed[uint32],
   216  	types.T_uint64:        concatFixed[uint64],
   217  	types.T_float32:       concatFixed[float32],
   218  	types.T_float64:       concatFixed[float64],
   219  	types.T_decimal64:     concatDecimal64,
   220  	types.T_decimal128:    concatDecimal128,
   221  	types.T_date:          concatTime[types.Date],
   222  	types.T_datetime:      concatTime[types.Datetime],
   223  	types.T_timestamp:     concatTime[types.Timestamp],
   224  	types.T_time:          concatTime[types.Time],
   225  	types.T_varchar:       concatVar,
   226  	types.T_char:          concatVar,
   227  	types.T_blob:          concatVar,
   228  	types.T_text:          concatVar,
   229  	types.T_varbinary:     concatVar,
   230  	types.T_binary:        concatVar,
   231  	types.T_json:          concatVar,
   232  	types.T_enum:          concatVar,
   233  	types.T_interval:      concatFixed[types.IntervalType],
   234  	types.T_TS:            concatFixed[types.TS],
   235  	types.T_Rowid:         concatFixed[types.Rowid],
   236  	types.T_Blockid:       concatFixed[types.Blockid],
   237  	types.T_array_float32: concatVar,
   238  	types.T_array_float64: concatVar,
   239  }
   240  
   241  func concatFixed[T types.FixedSizeTExceptStrType](v *vector.Vector, row int, src []byte) ([]byte, error) {
   242  	value := vector.GetFixedAt[T](v, row)
   243  	return fmt.Appendf(src, "%v", value), nil
   244  }
   245  
   246  func concatVar(v *vector.Vector, row int, src []byte) ([]byte, error) {
   247  	value := v.GetBytesAt(row)
   248  
   249  	if err := isValidGroupConcatUnit(value); err != nil {
   250  		return nil, err
   251  	}
   252  	return append(src, value...), nil
   253  }
   254  
   255  func concatDecimal64(v *vector.Vector, row int, src []byte) ([]byte, error) {
   256  	value := vector.GetFixedAt[types.Decimal64](v, row)
   257  	return fmt.Appendf(src, "%v", value.Format(v.GetType().Scale)), nil
   258  }
   259  
   260  func concatDecimal128(v *vector.Vector, row int, src []byte) ([]byte, error) {
   261  	value := vector.GetFixedAt[types.Decimal128](v, row)
   262  	return fmt.Appendf(src, "%v", value.Format(v.GetType().Scale)), nil
   263  }
   264  
   265  func concatTime[T fmt.Stringer](v *vector.Vector, row int, src []byte) ([]byte, error) {
   266  	value := vector.GetFixedAt[T](v, row)
   267  	return fmt.Appendf(src, "%v", value.String()), nil
   268  }