github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/concat.go (about) 1 // Copyright 2024 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package aggexec 16 17 import ( 18 "fmt" 19 "github.com/matrixorigin/matrixone/pkg/common/moerr" 20 "github.com/matrixorigin/matrixone/pkg/container/types" 21 "github.com/matrixorigin/matrixone/pkg/container/vector" 22 "math" 23 ) 24 25 const ( 26 groupConcatMaxLen = 1024 27 ) 28 29 // group_concat is a special string aggregation function. 30 type groupConcatExec struct { 31 multiAggInfo 32 ret aggFuncBytesResult 33 distinctHash 34 35 separator []byte 36 } 37 38 func GroupConcatReturnType(args []types.Type) types.Type { 39 for _, p := range args { 40 if p.Oid == types.T_binary || p.Oid == types.T_varbinary || p.Oid == types.T_blob { 41 return types.T_blob.ToType() 42 } 43 } 44 return types.T_text.ToType() 45 } 46 47 func newGroupConcatExec(mg AggMemoryManager, info multiAggInfo, separator string) AggFuncExec { 48 exec := &groupConcatExec{ 49 multiAggInfo: info, 50 ret: initBytesAggFuncResult(mg, info.retType, info.emptyNull), 51 separator: []byte(separator), 52 } 53 if info.distinct { 54 exec.distinctHash = newDistinctHash(mg.Mp(), false) 55 } 56 return exec 57 } 58 59 func isValidGroupConcatUnit(value []byte) error { 60 if len(value) > math.MaxUint16 { 61 return moerr.NewInternalErrorNoCtx("group_concat: the length of the value is too long") 62 } 63 return nil 64 } 65 66 func (exec *groupConcatExec) GroupGrow(more int) error { 67 if exec.IsDistinct() { 68 if err := exec.distinctHash.grows(more); err != nil { 69 return err 70 } 71 } 72 return exec.ret.grows(more) 73 } 74 75 func (exec *groupConcatExec) PreAllocateGroups(more int) error { 76 return exec.ret.preAllocate(more) 77 } 78 79 func (exec *groupConcatExec) Fill(groupIndex int, row int, vectors []*vector.Vector) error { 80 // if any value was null, there is no need to Fill. 81 u64Row := uint64(row) 82 for _, v := range vectors { 83 if v.IsNull(u64Row) { 84 return nil 85 } 86 } 87 88 if exec.IsDistinct() { 89 if need, err := exec.distinctHash.fill(groupIndex, vectors, row); err != nil || !need { 90 return err 91 } 92 } 93 94 exec.ret.groupToSet = groupIndex 95 exec.ret.setGroupNotEmpty(groupIndex) 96 r := exec.ret.aggGet() 97 if len(r) > groupConcatMaxLen { 98 return nil 99 } 100 if len(r) > 0 { 101 r = append(r, exec.separator...) 102 } 103 104 var err error 105 for i, v := range vectors { 106 if r, err = oidToConcatFunc[exec.multiAggInfo.argTypes[i].Oid](v, row, r); err != nil { 107 return err 108 } 109 } 110 if err = exec.ret.aggSet(r); err != nil { 111 return err 112 } 113 return nil 114 } 115 116 func (exec *groupConcatExec) BulkFill(groupIndex int, vectors []*vector.Vector) error { 117 exec.ret.groupToSet = groupIndex 118 for row, end := 0, vectors[0].Length(); row < end; row++ { 119 if err := exec.Fill(groupIndex, row, vectors); err != nil { 120 return err 121 } 122 } 123 return nil 124 } 125 126 func (exec *groupConcatExec) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error { 127 for i, j, idx := offset, offset+len(groups), 0; i < j; i++ { 128 if groups[idx] != GroupNotMatched { 129 if err := exec.Fill(int(groups[idx]-1), i, vectors); err != nil { 130 return err 131 } 132 } 133 idx++ 134 } 135 return nil 136 } 137 138 func (exec *groupConcatExec) SetExtraInformation(partialResult any, groupIndex int) error { 139 // todo: too bad here. 140 exec.separator = partialResult.([]byte) 141 return nil 142 } 143 144 func (exec *groupConcatExec) merge(other *groupConcatExec, idx1, idx2 int) error { 145 exec.ret.groupToSet = idx1 146 other.ret.groupToSet = idx2 147 if err := exec.distinctHash.merge(&other.distinctHash); err != nil { 148 return err 149 } 150 151 v1 := exec.ret.aggGet() 152 v2 := other.ret.aggGet() 153 if len(v2) == 0 || len(v1) > groupConcatMaxLen { 154 return nil 155 } 156 if len(v1) > 0 && len(v2) > 0 { 157 v1 = append(v1, exec.separator...) 158 v1 = append(v1, v2...) 159 return exec.ret.aggSet(v1) 160 } 161 if len(v1) == 0 { 162 return exec.ret.aggSet(v2) 163 } 164 return exec.ret.aggSet(v1) 165 } 166 167 func (exec *groupConcatExec) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error { 168 return exec.merge(next.(*groupConcatExec), groupIdx1, groupIdx2) 169 } 170 171 func (exec *groupConcatExec) BatchMerge(next AggFuncExec, offset int, groups []uint64) error { 172 other := next.(*groupConcatExec) 173 for i := range groups { 174 if groups[i] == GroupNotMatched { 175 continue 176 } 177 if err := exec.merge(other, int(groups[i])-1, i+offset); err != nil { 178 return err 179 } 180 } 181 return nil 182 } 183 184 func (exec *groupConcatExec) Flush() (*vector.Vector, error) { 185 return exec.ret.flush(), nil 186 } 187 188 func (exec *groupConcatExec) Free() { 189 exec.distinctHash.free() 190 exec.ret.free() 191 } 192 193 var GroupConcatUnsupportedTypes = []types.T{ 194 types.T_tuple, 195 } 196 197 func IsGroupConcatSupported(t types.Type) bool { 198 for _, unsupported := range GroupConcatUnsupportedTypes { 199 if t.Oid == unsupported { 200 return false 201 } 202 } 203 return true 204 } 205 206 var oidToConcatFunc = map[types.T]func(*vector.Vector, int, []byte) ([]byte, error){ 207 types.T_bit: concatFixed[uint64], 208 types.T_bool: concatFixed[bool], 209 types.T_int8: concatFixed[int8], 210 types.T_int16: concatFixed[int16], 211 types.T_int32: concatFixed[int32], 212 types.T_int64: concatFixed[int64], 213 types.T_uint8: concatFixed[uint8], 214 types.T_uint16: concatFixed[uint16], 215 types.T_uint32: concatFixed[uint32], 216 types.T_uint64: concatFixed[uint64], 217 types.T_float32: concatFixed[float32], 218 types.T_float64: concatFixed[float64], 219 types.T_decimal64: concatDecimal64, 220 types.T_decimal128: concatDecimal128, 221 types.T_date: concatTime[types.Date], 222 types.T_datetime: concatTime[types.Datetime], 223 types.T_timestamp: concatTime[types.Timestamp], 224 types.T_time: concatTime[types.Time], 225 types.T_varchar: concatVar, 226 types.T_char: concatVar, 227 types.T_blob: concatVar, 228 types.T_text: concatVar, 229 types.T_varbinary: concatVar, 230 types.T_binary: concatVar, 231 types.T_json: concatVar, 232 types.T_enum: concatVar, 233 types.T_interval: concatFixed[types.IntervalType], 234 types.T_TS: concatFixed[types.TS], 235 types.T_Rowid: concatFixed[types.Rowid], 236 types.T_Blockid: concatFixed[types.Blockid], 237 types.T_array_float32: concatVar, 238 types.T_array_float64: concatVar, 239 } 240 241 func concatFixed[T types.FixedSizeTExceptStrType](v *vector.Vector, row int, src []byte) ([]byte, error) { 242 value := vector.GetFixedAt[T](v, row) 243 return fmt.Appendf(src, "%v", value), nil 244 } 245 246 func concatVar(v *vector.Vector, row int, src []byte) ([]byte, error) { 247 value := v.GetBytesAt(row) 248 249 if err := isValidGroupConcatUnit(value); err != nil { 250 return nil, err 251 } 252 return append(src, value...), nil 253 } 254 255 func concatDecimal64(v *vector.Vector, row int, src []byte) ([]byte, error) { 256 value := vector.GetFixedAt[types.Decimal64](v, row) 257 return fmt.Appendf(src, "%v", value.Format(v.GetType().Scale)), nil 258 } 259 260 func concatDecimal128(v *vector.Vector, row int, src []byte) ([]byte, error) { 261 value := vector.GetFixedAt[types.Decimal128](v, row) 262 return fmt.Appendf(src, "%v", value.Format(v.GetType().Scale)), nil 263 } 264 265 func concatTime[T fmt.Stringer](v *vector.Vector, row int, src []byte) ([]byte, error) { 266 value := vector.GetFixedAt[T](v, row) 267 return fmt.Appendf(src, "%v", value.String()), nil 268 }