github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/approx_count.go (about) 1 // Copyright 2024 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package aggexec 16 17 import ( 18 hll "github.com/axiomhq/hyperloglog" 19 "github.com/matrixorigin/matrixone/pkg/container/types" 20 "github.com/matrixorigin/matrixone/pkg/container/vector" 21 ) 22 23 // approx_count() returns the approximate number of count(distinct) values in a group. 24 type approxCountFixedExec[T types.FixedSizeTExceptStrType] struct { 25 singleAggInfo 26 singleAggExecExtraInformation 27 arg sFixedArg[T] 28 ret aggFuncResult[uint64] 29 30 groups []*hll.Sketch 31 } 32 33 type approxCountVarExec struct { 34 singleAggInfo 35 singleAggExecExtraInformation 36 arg sBytesArg 37 ret aggFuncResult[uint64] 38 39 groups []*hll.Sketch 40 } 41 42 func newApproxCountFixedExec[T types.FixedSizeTExceptStrType](mg AggMemoryManager, info singleAggInfo) AggFuncExec { 43 return &approxCountFixedExec[T]{ 44 singleAggInfo: info, 45 ret: initFixedAggFuncResult[uint64](mg, info.retType, false), 46 } 47 } 48 49 func makeApproxCount(mg AggMemoryManager, id int64, arg types.Type) AggFuncExec { 50 info := singleAggInfo{ 51 aggID: id, 52 distinct: false, 53 argType: arg, 54 retType: types.T_uint64.ToType(), 55 emptyNull: false, 56 } 57 58 if info.argType.IsVarlen() { 59 return &approxCountVarExec{ 60 singleAggInfo: info, 61 ret: initFixedAggFuncResult[uint64](mg, info.retType, false), 62 } 63 } 64 65 switch info.argType.Oid { 66 case types.T_bool: 67 return newApproxCountFixedExec[bool](mg, info) 68 case types.T_bit, types.T_uint64: 69 return newApproxCountFixedExec[uint64](mg, info) 70 case types.T_int8: 71 return newApproxCountFixedExec[int8](mg, info) 72 case types.T_int16: 73 return newApproxCountFixedExec[int16](mg, info) 74 case types.T_int32: 75 return newApproxCountFixedExec[int32](mg, info) 76 case types.T_int64: 77 return newApproxCountFixedExec[int64](mg, info) 78 case types.T_uint8: 79 return newApproxCountFixedExec[uint8](mg, info) 80 case types.T_uint16: 81 return newApproxCountFixedExec[uint16](mg, info) 82 case types.T_uint32: 83 return newApproxCountFixedExec[uint32](mg, info) 84 case types.T_float32: 85 return newApproxCountFixedExec[float32](mg, info) 86 case types.T_float64: 87 return newApproxCountFixedExec[float64](mg, info) 88 case types.T_decimal64: 89 return newApproxCountFixedExec[types.Decimal64](mg, info) 90 case types.T_decimal128: 91 return newApproxCountFixedExec[types.Decimal128](mg, info) 92 case types.T_date: 93 return newApproxCountFixedExec[types.Date](mg, info) 94 case types.T_datetime: 95 return newApproxCountFixedExec[types.Datetime](mg, info) 96 case types.T_timestamp: 97 return newApproxCountFixedExec[types.Timestamp](mg, info) 98 case types.T_time: 99 return newApproxCountFixedExec[types.Time](mg, info) 100 case types.T_enum: 101 return newApproxCountFixedExec[types.Enum](mg, info) 102 case types.T_uuid: 103 return newApproxCountFixedExec[types.Uuid](mg, info) 104 default: 105 panic("unsupported type for approx_count()") 106 } 107 } 108 109 func (exec *approxCountFixedExec[T]) GroupGrow(more int) error { 110 oldLen, newLen := len(exec.groups), len(exec.groups)+more 111 exec.groups = append(exec.groups, make([]*hll.Sketch, more)...) 112 for i := oldLen; i < newLen; i++ { 113 exec.groups[i] = hll.New() 114 } 115 return exec.ret.grows(more) 116 } 117 118 func (exec *approxCountFixedExec[T]) PreAllocateGroups(more int) error { 119 return exec.ret.preAllocate(more) 120 } 121 122 func (exec *approxCountFixedExec[T]) Fill(groupIndex int, row int, vectors []*vector.Vector) error { 123 if vectors[0].IsNull(uint64(row)) { 124 return nil 125 } 126 if vectors[0].IsConst() { 127 row = 0 128 } 129 v := vector.MustFixedCol[T](vectors[0])[row] 130 exec.groups[groupIndex].Insert(types.EncodeFixed[T](v)) 131 return nil 132 } 133 134 func (exec *approxCountFixedExec[T]) BulkFill(groupIndex int, vectors []*vector.Vector) error { 135 if vectors[0].IsConstNull() { 136 return nil 137 } 138 if vectors[0].IsConst() { 139 v := vector.MustFixedCol[T](vectors[0])[0] 140 exec.groups[groupIndex].Insert(types.EncodeFixed[T](v)) 141 return nil 142 } 143 exec.arg.prepare(vectors[0]) 144 if exec.arg.w.WithAnyNullValue() { 145 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 146 if v, null := exec.arg.w.GetValue(i); !null { 147 exec.groups[groupIndex].Insert(types.EncodeFixed[T](v)) 148 } 149 } 150 return nil 151 } 152 153 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 154 v, _ := exec.arg.w.GetValue(i) 155 exec.groups[groupIndex].Insert(types.EncodeFixed[T](v)) 156 } 157 return nil 158 } 159 160 func (exec *approxCountFixedExec[T]) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error { 161 if vectors[0].IsConstNull() { 162 return nil 163 } 164 if vectors[0].IsConst() { 165 v := vector.MustFixedCol[T](vectors[0])[0] 166 for _, group := range groups { 167 if group != GroupNotMatched { 168 exec.groups[group-1].Insert(types.EncodeFixed[T](v)) 169 } 170 } 171 return nil 172 } 173 174 exec.arg.prepare(vectors[0]) 175 u64Offset := uint64(offset) 176 if exec.arg.w.WithAnyNullValue() { 177 for i, j := uint64(0), uint64(len(groups)); i < j; i++ { 178 if groups[i] != GroupNotMatched { 179 v, null := exec.arg.w.GetValue(i + u64Offset) 180 if !null { 181 exec.groups[groups[i]-1].Insert(types.EncodeFixed[T](v)) 182 } 183 } 184 } 185 return nil 186 } 187 188 for i, j := uint64(0), uint64(len(groups)); i < j; i++ { 189 if groups[i] != GroupNotMatched { 190 v, _ := exec.arg.w.GetValue(i + u64Offset) 191 exec.groups[groups[i]-1].Insert(types.EncodeFixed[T](v)) 192 } 193 } 194 return nil 195 } 196 197 func (exec *approxCountFixedExec[T]) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error { 198 nextExec := next.(*approxCountFixedExec[T]) 199 return exec.groups[groupIdx1].Merge(nextExec.groups[groupIdx2]) 200 } 201 202 func (exec *approxCountFixedExec[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error { 203 other := next.(*approxCountFixedExec[T]) 204 205 for i := range groups { 206 if groups[i] == GroupNotMatched { 207 continue 208 } 209 g1, g2 := int(groups[i])-1, i+offset 210 if err := exec.groups[g1].Merge(other.groups[g2]); err != nil { 211 return err 212 } 213 } 214 return nil 215 } 216 217 func (exec *approxCountFixedExec[T]) Flush() (*vector.Vector, error) { 218 setter := exec.ret.aggSet 219 for i, group := range exec.groups { 220 exec.ret.groupToSet = i 221 setter(group.Estimate()) 222 } 223 224 if exec.partialResult != nil { 225 getter := exec.ret.aggGet 226 exec.ret.groupToSet = exec.partialGroup 227 setter(getter() + exec.partialResult.(uint64)) 228 } 229 return exec.ret.flush(), nil 230 } 231 232 func (exec *approxCountFixedExec[T]) Free() { 233 exec.ret.free() 234 exec.groups = nil 235 } 236 237 func (exec *approxCountVarExec) GroupGrow(more int) error { 238 oldLen, newLen := len(exec.groups), len(exec.groups)+more 239 if cap(exec.groups) >= newLen { 240 exec.groups = exec.groups[:newLen] 241 } else { 242 exec.groups = append(exec.groups, make([]*hll.Sketch, more)...) 243 } 244 245 for i := oldLen; i < newLen; i++ { 246 exec.groups[i] = hll.New() 247 } 248 return exec.ret.grows(more) 249 } 250 251 func (exec *approxCountVarExec) PreAllocateGroups(more int) error { 252 if len(exec.groups) == 0 { 253 exec.groups = make([]*hll.Sketch, 0, more) 254 } else { 255 oldLength := len(exec.groups) 256 exec.groups = append(exec.groups, make([]*hll.Sketch, more)...) 257 exec.groups = exec.groups[:oldLength] 258 } 259 260 return exec.ret.preAllocate(more) 261 } 262 263 func (exec *approxCountVarExec) Fill(groupIndex int, row int, vectors []*vector.Vector) error { 264 if vectors[0].IsNull(uint64(row)) { 265 return nil 266 } 267 if vectors[0].IsConst() { 268 row = 0 269 } 270 v := vector.MustBytesCol(vectors[0])[row] 271 exec.groups[groupIndex].Insert(v) 272 return nil 273 } 274 275 func (exec *approxCountVarExec) BulkFill(groupIndex int, vectors []*vector.Vector) error { 276 if vectors[0].IsConstNull() { 277 return nil 278 } 279 if vectors[0].IsConst() { 280 v := vector.MustBytesCol(vectors[0])[0] 281 exec.groups[groupIndex].Insert(v) 282 return nil 283 } 284 exec.arg.prepare(vectors[0]) 285 if exec.arg.w.WithAnyNullValue() { 286 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 287 if v, null := exec.arg.w.GetStrValue(i); !null { 288 exec.groups[groupIndex].Insert(v) 289 } 290 } 291 return nil 292 } 293 294 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 295 v, _ := exec.arg.w.GetStrValue(i) 296 exec.groups[groupIndex].Insert(v) 297 } 298 return nil 299 } 300 301 func (exec *approxCountVarExec) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error { 302 if vectors[0].IsConstNull() { 303 return nil 304 } 305 if vectors[0].IsConst() { 306 v := vector.MustBytesCol(vectors[0])[0] 307 for _, group := range groups { 308 if group != GroupNotMatched { 309 exec.groups[group-1].Insert(v) 310 } 311 } 312 return nil 313 } 314 315 exec.arg.prepare(vectors[0]) 316 u64Offset := uint64(offset) 317 if exec.arg.w.WithAnyNullValue() { 318 for i, j := uint64(0), uint64(len(groups)); i < j; i++ { 319 if groups[i] != GroupNotMatched { 320 v, null := exec.arg.w.GetStrValue(i + u64Offset) 321 if !null { 322 exec.groups[groups[i]-1].Insert(v) 323 } 324 } 325 } 326 return nil 327 } 328 329 for i, j := uint64(0), uint64(len(groups)); i < j; i++ { 330 if groups[i] != GroupNotMatched { 331 v, _ := exec.arg.w.GetStrValue(i + u64Offset) 332 exec.groups[groups[i]-1].Insert(v) 333 } 334 } 335 return nil 336 } 337 338 func (exec *approxCountVarExec) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error { 339 nextExec := next.(*approxCountVarExec) 340 return exec.groups[groupIdx1].Merge(nextExec.groups[groupIdx2]) 341 } 342 343 func (exec *approxCountVarExec) BatchMerge(next AggFuncExec, offset int, groups []uint64) error { 344 other := next.(*approxCountVarExec) 345 346 for i := range groups { 347 if groups[i] == GroupNotMatched { 348 continue 349 } 350 g1, g2 := int(groups[i])-1, i+offset 351 if err := exec.groups[g1].Merge(other.groups[g2]); err != nil { 352 return err 353 } 354 } 355 return nil 356 } 357 358 func (exec *approxCountVarExec) Flush() (*vector.Vector, error) { 359 setter := exec.ret.aggSet 360 for i, group := range exec.groups { 361 exec.ret.groupToSet = i 362 setter(group.Estimate()) 363 } 364 365 if exec.partialResult != nil { 366 getter := exec.ret.aggGet 367 exec.ret.groupToSet = exec.partialGroup 368 setter(getter() + exec.partialResult.(uint64)) 369 } 370 return exec.ret.flush(), nil 371 } 372 373 func (exec *approxCountVarExec) Free() { 374 exec.ret.free() 375 exec.groups = nil 376 }