github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/aggfuncs/func_group_concat.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package aggfuncs 15 16 import ( 17 "bytes" 18 "container/heap" 19 "sort" 20 "sync/atomic" 21 22 "github.com/whtcorpsinc/BerolinaSQL/terror" 23 allegrosql "github.com/whtcorpsinc/milevadb/errno" 24 "github.com/whtcorpsinc/milevadb/memex" 25 "github.com/whtcorpsinc/milevadb/causet/soliton" 26 "github.com/whtcorpsinc/milevadb/stochastikctx" 27 "github.com/whtcorpsinc/milevadb/types" 28 "github.com/whtcorpsinc/milevadb/soliton/chunk" 29 "github.com/whtcorpsinc/milevadb/soliton/codec" 30 "github.com/whtcorpsinc/milevadb/soliton/replog" 31 "github.com/whtcorpsinc/milevadb/soliton/set" 32 ) 33 34 type baseGroupConcat4String struct { 35 baseAggFunc 36 byItems []*soliton.ByItems 37 38 sep string 39 maxLen uint64 40 // According to MyALLEGROSQL, a 'group_concat' function generates exactly one 'truncated' warning during its life time, no matter 41 // how many group actually truncated. 'truncated' acts as a sentinel to indicate whether this warning has already been 42 // generated. 43 truncated *int32 44 } 45 46 func (e *baseGroupConcat4String) AppendFinalResult2Chunk(sctx stochastikctx.Context, pr PartialResult, chk *chunk.Chunk) error { 47 p := (*partialResult4GroupConcat)(pr) 48 if p.buffer == nil { 49 chk.AppendNull(e.ordinal) 50 return nil 51 } 52 chk.AppendString(e.ordinal, p.buffer.String()) 53 return nil 54 } 55 56 func (e *baseGroupConcat4String) handleTruncateError(sctx stochastikctx.Context) (err error) { 57 if atomic.CompareAndSwapInt32(e.truncated, 0, 1) { 58 if !sctx.GetStochastikVars().StmtCtx.TruncateAsWarning { 59 return memex.ErrCutValueGroupConcat.GenWithStackByArgs(e.args[0].String()) 60 } 61 sctx.GetStochastikVars().StmtCtx.AppendWarning(memex.ErrCutValueGroupConcat.GenWithStackByArgs(e.args[0].String())) 62 } 63 return nil 64 } 65 66 func (e *baseGroupConcat4String) truncatePartialResultIfNeed(sctx stochastikctx.Context, buffer *bytes.Buffer) (err error) { 67 if e.maxLen > 0 && uint64(buffer.Len()) > e.maxLen { 68 buffer.Truncate(int(e.maxLen)) 69 return e.handleTruncateError(sctx) 70 } 71 return nil 72 } 73 74 type basePartialResult4GroupConcat struct { 75 valsBuf *bytes.Buffer 76 buffer *bytes.Buffer 77 } 78 79 type partialResult4GroupConcat struct { 80 basePartialResult4GroupConcat 81 } 82 83 type groupConcat struct { 84 baseGroupConcat4String 85 } 86 87 func (e *groupConcat) AllocPartialResult() (pr PartialResult, memDelta int64) { 88 p := new(partialResult4GroupConcat) 89 p.valsBuf = &bytes.Buffer{} 90 return PartialResult(p), 0 91 } 92 93 func (e *groupConcat) ResetPartialResult(pr PartialResult) { 94 p := (*partialResult4GroupConcat)(pr) 95 p.buffer = nil 96 } 97 98 func (e *groupConcat) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) { 99 p := (*partialResult4GroupConcat)(pr) 100 v, isNull := "", false 101 for _, event := range rowsInGroup { 102 p.valsBuf.Reset() 103 for _, arg := range e.args { 104 v, isNull, err = arg.EvalString(sctx, event) 105 if err != nil { 106 return 0, err 107 } 108 if isNull { 109 break 110 } 111 p.valsBuf.WriteString(v) 112 } 113 if isNull { 114 continue 115 } 116 if p.buffer == nil { 117 p.buffer = &bytes.Buffer{} 118 } else { 119 p.buffer.WriteString(e.sep) 120 } 121 p.buffer.WriteString(p.valsBuf.String()) 122 } 123 if p.buffer != nil { 124 return 0, e.truncatePartialResultIfNeed(sctx, p.buffer) 125 } 126 return 0, nil 127 } 128 129 func (e *groupConcat) MergePartialResult(sctx stochastikctx.Context, src, dst PartialResult) (memDelta int64, err error) { 130 p1, p2 := (*partialResult4GroupConcat)(src), (*partialResult4GroupConcat)(dst) 131 if p1.buffer == nil { 132 return 0, nil 133 } 134 if p2.buffer == nil { 135 p2.buffer = p1.buffer 136 return 0, nil 137 } 138 p2.buffer.WriteString(e.sep) 139 p2.buffer.WriteString(p1.buffer.String()) 140 return 0, e.truncatePartialResultIfNeed(sctx, p2.buffer) 141 } 142 143 // SetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type. 144 func (e *groupConcat) SetTruncated(t *int32) { 145 e.truncated = t 146 } 147 148 // GetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type. 149 func (e *groupConcat) GetTruncated() *int32 { 150 return e.truncated 151 } 152 153 type partialResult4GroupConcatDistinct struct { 154 basePartialResult4GroupConcat 155 valSet set.StringSet 156 encodeBytesBuffer []byte 157 } 158 159 type groupConcatDistinct struct { 160 baseGroupConcat4String 161 } 162 163 func (e *groupConcatDistinct) AllocPartialResult() (pr PartialResult, memDelta int64) { 164 p := new(partialResult4GroupConcatDistinct) 165 p.valsBuf = &bytes.Buffer{} 166 p.valSet = set.NewStringSet() 167 return PartialResult(p), 0 168 } 169 170 func (e *groupConcatDistinct) ResetPartialResult(pr PartialResult) { 171 p := (*partialResult4GroupConcatDistinct)(pr) 172 p.buffer, p.valSet = nil, set.NewStringSet() 173 } 174 175 func (e *groupConcatDistinct) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) { 176 p := (*partialResult4GroupConcatDistinct)(pr) 177 v, isNull := "", false 178 for _, event := range rowsInGroup { 179 p.valsBuf.Reset() 180 p.encodeBytesBuffer = p.encodeBytesBuffer[:0] 181 for _, arg := range e.args { 182 v, isNull, err = arg.EvalString(sctx, event) 183 if err != nil { 184 return 0, err 185 } 186 if isNull { 187 break 188 } 189 p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, replog.Slice(v)) 190 p.valsBuf.WriteString(v) 191 } 192 if isNull { 193 continue 194 } 195 joinedVal := string(p.encodeBytesBuffer) 196 if p.valSet.Exist(joinedVal) { 197 continue 198 } 199 p.valSet.Insert(joinedVal) 200 // write separator 201 if p.buffer == nil { 202 p.buffer = &bytes.Buffer{} 203 } else { 204 p.buffer.WriteString(e.sep) 205 } 206 // write values 207 p.buffer.WriteString(p.valsBuf.String()) 208 } 209 if p.buffer != nil { 210 return 0, e.truncatePartialResultIfNeed(sctx, p.buffer) 211 } 212 return 0, nil 213 } 214 215 // SetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type. 216 func (e *groupConcatDistinct) SetTruncated(t *int32) { 217 e.truncated = t 218 } 219 220 // GetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type. 221 func (e *groupConcatDistinct) GetTruncated() *int32 { 222 return e.truncated 223 } 224 225 type sortEvent struct { 226 buffer *bytes.Buffer 227 byItems []*types.Causet 228 } 229 230 type topNEvents struct { 231 rows []sortEvent 232 desc []bool 233 sctx stochastikctx.Context 234 err error 235 236 currSize uint64 237 limitSize uint64 238 sepSize uint64 239 } 240 241 func (h topNEvents) Len() int { 242 return len(h.rows) 243 } 244 245 func (h topNEvents) Less(i, j int) bool { 246 n := len(h.rows[i].byItems) 247 for k := 0; k < n; k++ { 248 ret, err := h.rows[i].byItems[k].CompareCauset(h.sctx.GetStochastikVars().StmtCtx, h.rows[j].byItems[k]) 249 if err != nil { 250 h.err = err 251 return false 252 } 253 if h.desc[k] { 254 ret = -ret 255 } 256 if ret > 0 { 257 return true 258 } 259 if ret < 0 { 260 return false 261 } 262 } 263 return false 264 } 265 266 func (h topNEvents) Swap(i, j int) { 267 h.rows[i], h.rows[j] = h.rows[j], h.rows[i] 268 } 269 270 func (h *topNEvents) Push(x interface{}) { 271 h.rows = append(h.rows, x.(sortEvent)) 272 } 273 274 func (h *topNEvents) Pop() interface{} { 275 n := len(h.rows) 276 x := h.rows[n-1] 277 h.rows = h.rows[:n-1] 278 return x 279 } 280 281 func (h *topNEvents) tryToAdd(event sortEvent) (truncated bool) { 282 h.currSize += uint64(event.buffer.Len()) 283 if len(h.rows) > 0 { 284 h.currSize += h.sepSize 285 } 286 heap.Push(h, event) 287 if h.currSize <= h.limitSize { 288 return false 289 } 290 291 for h.currSize > h.limitSize { 292 debt := h.currSize - h.limitSize 293 if uint64(h.rows[0].buffer.Len()) > debt { 294 h.currSize -= debt 295 h.rows[0].buffer.Truncate(h.rows[0].buffer.Len() - int(debt)) 296 } else { 297 h.currSize -= uint64(h.rows[0].buffer.Len()) + h.sepSize 298 heap.Pop(h) 299 } 300 } 301 return true 302 } 303 304 func (h *topNEvents) reset() { 305 h.rows = h.rows[:0] 306 h.err = nil 307 h.currSize = 0 308 } 309 310 func (h *topNEvents) concat(sep string, truncated bool) string { 311 buffer := new(bytes.Buffer) 312 sort.Sort(sort.Reverse(h)) 313 for i, event := range h.rows { 314 if i != 0 { 315 buffer.WriteString(sep) 316 } 317 buffer.Write(event.buffer.Bytes()) 318 } 319 if truncated && uint64(buffer.Len()) < h.limitSize { 320 // append the last separator, because the last separator may be truncated in tryToAdd. 321 buffer.WriteString(sep) 322 buffer.Truncate(int(h.limitSize)) 323 } 324 return buffer.String() 325 } 326 327 type partialResult4GroupConcatOrder struct { 328 topN *topNEvents 329 } 330 331 type groupConcatOrder struct { 332 baseGroupConcat4String 333 } 334 335 func (e *groupConcatOrder) AppendFinalResult2Chunk(sctx stochastikctx.Context, pr PartialResult, chk *chunk.Chunk) error { 336 p := (*partialResult4GroupConcatOrder)(pr) 337 if p.topN.Len() == 0 { 338 chk.AppendNull(e.ordinal) 339 return nil 340 } 341 chk.AppendString(e.ordinal, p.topN.concat(e.sep, *e.truncated == 1)) 342 return nil 343 } 344 345 func (e *groupConcatOrder) AllocPartialResult() (pr PartialResult, memDelta int64) { 346 desc := make([]bool, len(e.byItems)) 347 for i, byItem := range e.byItems { 348 desc[i] = byItem.Desc 349 } 350 p := &partialResult4GroupConcatOrder{ 351 topN: &topNEvents{ 352 desc: desc, 353 currSize: 0, 354 limitSize: e.maxLen, 355 sepSize: uint64(len(e.sep)), 356 }, 357 } 358 return PartialResult(p), 0 359 } 360 361 func (e *groupConcatOrder) ResetPartialResult(pr PartialResult) { 362 p := (*partialResult4GroupConcatOrder)(pr) 363 p.topN.reset() 364 } 365 366 func (e *groupConcatOrder) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) { 367 p := (*partialResult4GroupConcatOrder)(pr) 368 p.topN.sctx = sctx 369 v, isNull := "", false 370 for _, event := range rowsInGroup { 371 buffer := new(bytes.Buffer) 372 for _, arg := range e.args { 373 v, isNull, err = arg.EvalString(sctx, event) 374 if err != nil { 375 return 0, err 376 } 377 if isNull { 378 break 379 } 380 buffer.WriteString(v) 381 } 382 if isNull { 383 continue 384 } 385 sortEvent := sortEvent{ 386 buffer: buffer, 387 byItems: make([]*types.Causet, 0, len(e.byItems)), 388 } 389 for _, byItem := range e.byItems { 390 d, err := byItem.Expr.Eval(event) 391 if err != nil { 392 return 0, err 393 } 394 sortEvent.byItems = append(sortEvent.byItems, d.Clone()) 395 } 396 truncated := p.topN.tryToAdd(sortEvent) 397 if p.topN.err != nil { 398 return 0, p.topN.err 399 } 400 if truncated { 401 if err := e.handleTruncateError(sctx); err != nil { 402 return 0, err 403 } 404 } 405 } 406 return 0, nil 407 } 408 409 func (e *groupConcatOrder) MergePartialResult(sctx stochastikctx.Context, src, dst PartialResult) (memDelta int64, err error) { 410 // If order by exists, the parallel hash aggregation is forbidden in interlockBuilder.buildHashAgg. 411 // So MergePartialResult will not be called. 412 return 0, terror.ClassOptimizer.New(allegrosql.ErrInternal, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrInternal]).GenWithStack("groupConcatOrder.MergePartialResult should not be called") 413 } 414 415 // SetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type. 416 func (e *groupConcatOrder) SetTruncated(t *int32) { 417 e.truncated = t 418 } 419 420 // GetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type. 421 func (e *groupConcatOrder) GetTruncated() *int32 { 422 return e.truncated 423 } 424 425 type partialResult4GroupConcatOrderDistinct struct { 426 topN *topNEvents 427 valSet set.StringSet 428 encodeBytesBuffer []byte 429 } 430 431 type groupConcatDistinctOrder struct { 432 baseGroupConcat4String 433 } 434 435 func (e *groupConcatDistinctOrder) AppendFinalResult2Chunk(sctx stochastikctx.Context, pr PartialResult, chk *chunk.Chunk) error { 436 p := (*partialResult4GroupConcatOrderDistinct)(pr) 437 if p.topN.Len() == 0 { 438 chk.AppendNull(e.ordinal) 439 return nil 440 } 441 chk.AppendString(e.ordinal, p.topN.concat(e.sep, *e.truncated == 1)) 442 return nil 443 } 444 445 func (e *groupConcatDistinctOrder) AllocPartialResult() (pr PartialResult, memDelta int64) { 446 desc := make([]bool, len(e.byItems)) 447 for i, byItem := range e.byItems { 448 desc[i] = byItem.Desc 449 } 450 p := &partialResult4GroupConcatOrderDistinct{ 451 topN: &topNEvents{ 452 desc: desc, 453 currSize: 0, 454 limitSize: e.maxLen, 455 sepSize: uint64(len(e.sep)), 456 }, 457 valSet: set.NewStringSet(), 458 } 459 return PartialResult(p), 0 460 } 461 462 func (e *groupConcatDistinctOrder) ResetPartialResult(pr PartialResult) { 463 p := (*partialResult4GroupConcatOrderDistinct)(pr) 464 p.topN.reset() 465 p.valSet = set.NewStringSet() 466 } 467 468 func (e *groupConcatDistinctOrder) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) { 469 p := (*partialResult4GroupConcatOrderDistinct)(pr) 470 p.topN.sctx = sctx 471 v, isNull := "", false 472 for _, event := range rowsInGroup { 473 buffer := new(bytes.Buffer) 474 p.encodeBytesBuffer = p.encodeBytesBuffer[:0] 475 for _, arg := range e.args { 476 v, isNull, err = arg.EvalString(sctx, event) 477 if err != nil { 478 return 0, err 479 } 480 if isNull { 481 break 482 } 483 p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, replog.Slice(v)) 484 buffer.WriteString(v) 485 } 486 if isNull { 487 continue 488 } 489 joinedVal := string(p.encodeBytesBuffer) 490 if p.valSet.Exist(joinedVal) { 491 continue 492 } 493 p.valSet.Insert(joinedVal) 494 sortEvent := sortEvent{ 495 buffer: buffer, 496 byItems: make([]*types.Causet, 0, len(e.byItems)), 497 } 498 for _, byItem := range e.byItems { 499 d, err := byItem.Expr.Eval(event) 500 if err != nil { 501 return 0, err 502 } 503 sortEvent.byItems = append(sortEvent.byItems, d.Clone()) 504 } 505 truncated := p.topN.tryToAdd(sortEvent) 506 if p.topN.err != nil { 507 return 0, p.topN.err 508 } 509 if truncated { 510 if err := e.handleTruncateError(sctx); err != nil { 511 return 0, err 512 } 513 } 514 } 515 return 0, nil 516 } 517 518 func (e *groupConcatDistinctOrder) MergePartialResult(sctx stochastikctx.Context, src, dst PartialResult) (memDelta int64, err error) { 519 // If order by exists, the parallel hash aggregation is forbidden in interlockBuilder.buildHashAgg. 520 // So MergePartialResult will not be called. 521 return 0, terror.ClassOptimizer.New(allegrosql.ErrInternal, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrInternal]).GenWithStack("groupConcatDistinctOrder.MergePartialResult should not be called") 522 }