github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/interlock/aggfuncs/func_group_concat.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package aggfuncs
    15  
    16  import (
    17  	"bytes"
    18  	"container/heap"
    19  	"sort"
    20  	"sync/atomic"
    21  
    22  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    23  	allegrosql "github.com/whtcorpsinc/milevadb/errno"
    24  	"github.com/whtcorpsinc/milevadb/memex"
    25  	"github.com/whtcorpsinc/milevadb/causet/soliton"
    26  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    27  	"github.com/whtcorpsinc/milevadb/types"
    28  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    29  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    30  	"github.com/whtcorpsinc/milevadb/soliton/replog"
    31  	"github.com/whtcorpsinc/milevadb/soliton/set"
    32  )
    33  
    34  type baseGroupConcat4String struct {
    35  	baseAggFunc
    36  	byItems []*soliton.ByItems
    37  
    38  	sep    string
    39  	maxLen uint64
    40  	// According to MyALLEGROSQL, a 'group_concat' function generates exactly one 'truncated' warning during its life time, no matter
    41  	// how many group actually truncated. 'truncated' acts as a sentinel to indicate whether this warning has already been
    42  	// generated.
    43  	truncated *int32
    44  }
    45  
    46  func (e *baseGroupConcat4String) AppendFinalResult2Chunk(sctx stochastikctx.Context, pr PartialResult, chk *chunk.Chunk) error {
    47  	p := (*partialResult4GroupConcat)(pr)
    48  	if p.buffer == nil {
    49  		chk.AppendNull(e.ordinal)
    50  		return nil
    51  	}
    52  	chk.AppendString(e.ordinal, p.buffer.String())
    53  	return nil
    54  }
    55  
    56  func (e *baseGroupConcat4String) handleTruncateError(sctx stochastikctx.Context) (err error) {
    57  	if atomic.CompareAndSwapInt32(e.truncated, 0, 1) {
    58  		if !sctx.GetStochastikVars().StmtCtx.TruncateAsWarning {
    59  			return memex.ErrCutValueGroupConcat.GenWithStackByArgs(e.args[0].String())
    60  		}
    61  		sctx.GetStochastikVars().StmtCtx.AppendWarning(memex.ErrCutValueGroupConcat.GenWithStackByArgs(e.args[0].String()))
    62  	}
    63  	return nil
    64  }
    65  
    66  func (e *baseGroupConcat4String) truncatePartialResultIfNeed(sctx stochastikctx.Context, buffer *bytes.Buffer) (err error) {
    67  	if e.maxLen > 0 && uint64(buffer.Len()) > e.maxLen {
    68  		buffer.Truncate(int(e.maxLen))
    69  		return e.handleTruncateError(sctx)
    70  	}
    71  	return nil
    72  }
    73  
    74  type basePartialResult4GroupConcat struct {
    75  	valsBuf *bytes.Buffer
    76  	buffer  *bytes.Buffer
    77  }
    78  
    79  type partialResult4GroupConcat struct {
    80  	basePartialResult4GroupConcat
    81  }
    82  
    83  type groupConcat struct {
    84  	baseGroupConcat4String
    85  }
    86  
    87  func (e *groupConcat) AllocPartialResult() (pr PartialResult, memDelta int64) {
    88  	p := new(partialResult4GroupConcat)
    89  	p.valsBuf = &bytes.Buffer{}
    90  	return PartialResult(p), 0
    91  }
    92  
    93  func (e *groupConcat) ResetPartialResult(pr PartialResult) {
    94  	p := (*partialResult4GroupConcat)(pr)
    95  	p.buffer = nil
    96  }
    97  
    98  func (e *groupConcat) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) {
    99  	p := (*partialResult4GroupConcat)(pr)
   100  	v, isNull := "", false
   101  	for _, event := range rowsInGroup {
   102  		p.valsBuf.Reset()
   103  		for _, arg := range e.args {
   104  			v, isNull, err = arg.EvalString(sctx, event)
   105  			if err != nil {
   106  				return 0, err
   107  			}
   108  			if isNull {
   109  				break
   110  			}
   111  			p.valsBuf.WriteString(v)
   112  		}
   113  		if isNull {
   114  			continue
   115  		}
   116  		if p.buffer == nil {
   117  			p.buffer = &bytes.Buffer{}
   118  		} else {
   119  			p.buffer.WriteString(e.sep)
   120  		}
   121  		p.buffer.WriteString(p.valsBuf.String())
   122  	}
   123  	if p.buffer != nil {
   124  		return 0, e.truncatePartialResultIfNeed(sctx, p.buffer)
   125  	}
   126  	return 0, nil
   127  }
   128  
   129  func (e *groupConcat) MergePartialResult(sctx stochastikctx.Context, src, dst PartialResult) (memDelta int64, err error) {
   130  	p1, p2 := (*partialResult4GroupConcat)(src), (*partialResult4GroupConcat)(dst)
   131  	if p1.buffer == nil {
   132  		return 0, nil
   133  	}
   134  	if p2.buffer == nil {
   135  		p2.buffer = p1.buffer
   136  		return 0, nil
   137  	}
   138  	p2.buffer.WriteString(e.sep)
   139  	p2.buffer.WriteString(p1.buffer.String())
   140  	return 0, e.truncatePartialResultIfNeed(sctx, p2.buffer)
   141  }
   142  
   143  // SetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type.
   144  func (e *groupConcat) SetTruncated(t *int32) {
   145  	e.truncated = t
   146  }
   147  
   148  // GetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type.
   149  func (e *groupConcat) GetTruncated() *int32 {
   150  	return e.truncated
   151  }
   152  
   153  type partialResult4GroupConcatDistinct struct {
   154  	basePartialResult4GroupConcat
   155  	valSet            set.StringSet
   156  	encodeBytesBuffer []byte
   157  }
   158  
   159  type groupConcatDistinct struct {
   160  	baseGroupConcat4String
   161  }
   162  
   163  func (e *groupConcatDistinct) AllocPartialResult() (pr PartialResult, memDelta int64) {
   164  	p := new(partialResult4GroupConcatDistinct)
   165  	p.valsBuf = &bytes.Buffer{}
   166  	p.valSet = set.NewStringSet()
   167  	return PartialResult(p), 0
   168  }
   169  
   170  func (e *groupConcatDistinct) ResetPartialResult(pr PartialResult) {
   171  	p := (*partialResult4GroupConcatDistinct)(pr)
   172  	p.buffer, p.valSet = nil, set.NewStringSet()
   173  }
   174  
   175  func (e *groupConcatDistinct) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) {
   176  	p := (*partialResult4GroupConcatDistinct)(pr)
   177  	v, isNull := "", false
   178  	for _, event := range rowsInGroup {
   179  		p.valsBuf.Reset()
   180  		p.encodeBytesBuffer = p.encodeBytesBuffer[:0]
   181  		for _, arg := range e.args {
   182  			v, isNull, err = arg.EvalString(sctx, event)
   183  			if err != nil {
   184  				return 0, err
   185  			}
   186  			if isNull {
   187  				break
   188  			}
   189  			p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, replog.Slice(v))
   190  			p.valsBuf.WriteString(v)
   191  		}
   192  		if isNull {
   193  			continue
   194  		}
   195  		joinedVal := string(p.encodeBytesBuffer)
   196  		if p.valSet.Exist(joinedVal) {
   197  			continue
   198  		}
   199  		p.valSet.Insert(joinedVal)
   200  		// write separator
   201  		if p.buffer == nil {
   202  			p.buffer = &bytes.Buffer{}
   203  		} else {
   204  			p.buffer.WriteString(e.sep)
   205  		}
   206  		// write values
   207  		p.buffer.WriteString(p.valsBuf.String())
   208  	}
   209  	if p.buffer != nil {
   210  		return 0, e.truncatePartialResultIfNeed(sctx, p.buffer)
   211  	}
   212  	return 0, nil
   213  }
   214  
   215  // SetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type.
   216  func (e *groupConcatDistinct) SetTruncated(t *int32) {
   217  	e.truncated = t
   218  }
   219  
   220  // GetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type.
   221  func (e *groupConcatDistinct) GetTruncated() *int32 {
   222  	return e.truncated
   223  }
   224  
   225  type sortEvent struct {
   226  	buffer  *bytes.Buffer
   227  	byItems []*types.Causet
   228  }
   229  
   230  type topNEvents struct {
   231  	rows []sortEvent
   232  	desc []bool
   233  	sctx stochastikctx.Context
   234  	err  error
   235  
   236  	currSize  uint64
   237  	limitSize uint64
   238  	sepSize   uint64
   239  }
   240  
   241  func (h topNEvents) Len() int {
   242  	return len(h.rows)
   243  }
   244  
   245  func (h topNEvents) Less(i, j int) bool {
   246  	n := len(h.rows[i].byItems)
   247  	for k := 0; k < n; k++ {
   248  		ret, err := h.rows[i].byItems[k].CompareCauset(h.sctx.GetStochastikVars().StmtCtx, h.rows[j].byItems[k])
   249  		if err != nil {
   250  			h.err = err
   251  			return false
   252  		}
   253  		if h.desc[k] {
   254  			ret = -ret
   255  		}
   256  		if ret > 0 {
   257  			return true
   258  		}
   259  		if ret < 0 {
   260  			return false
   261  		}
   262  	}
   263  	return false
   264  }
   265  
   266  func (h topNEvents) Swap(i, j int) {
   267  	h.rows[i], h.rows[j] = h.rows[j], h.rows[i]
   268  }
   269  
   270  func (h *topNEvents) Push(x interface{}) {
   271  	h.rows = append(h.rows, x.(sortEvent))
   272  }
   273  
   274  func (h *topNEvents) Pop() interface{} {
   275  	n := len(h.rows)
   276  	x := h.rows[n-1]
   277  	h.rows = h.rows[:n-1]
   278  	return x
   279  }
   280  
   281  func (h *topNEvents) tryToAdd(event sortEvent) (truncated bool) {
   282  	h.currSize += uint64(event.buffer.Len())
   283  	if len(h.rows) > 0 {
   284  		h.currSize += h.sepSize
   285  	}
   286  	heap.Push(h, event)
   287  	if h.currSize <= h.limitSize {
   288  		return false
   289  	}
   290  
   291  	for h.currSize > h.limitSize {
   292  		debt := h.currSize - h.limitSize
   293  		if uint64(h.rows[0].buffer.Len()) > debt {
   294  			h.currSize -= debt
   295  			h.rows[0].buffer.Truncate(h.rows[0].buffer.Len() - int(debt))
   296  		} else {
   297  			h.currSize -= uint64(h.rows[0].buffer.Len()) + h.sepSize
   298  			heap.Pop(h)
   299  		}
   300  	}
   301  	return true
   302  }
   303  
   304  func (h *topNEvents) reset() {
   305  	h.rows = h.rows[:0]
   306  	h.err = nil
   307  	h.currSize = 0
   308  }
   309  
   310  func (h *topNEvents) concat(sep string, truncated bool) string {
   311  	buffer := new(bytes.Buffer)
   312  	sort.Sort(sort.Reverse(h))
   313  	for i, event := range h.rows {
   314  		if i != 0 {
   315  			buffer.WriteString(sep)
   316  		}
   317  		buffer.Write(event.buffer.Bytes())
   318  	}
   319  	if truncated && uint64(buffer.Len()) < h.limitSize {
   320  		// append the last separator, because the last separator may be truncated in tryToAdd.
   321  		buffer.WriteString(sep)
   322  		buffer.Truncate(int(h.limitSize))
   323  	}
   324  	return buffer.String()
   325  }
   326  
   327  type partialResult4GroupConcatOrder struct {
   328  	topN *topNEvents
   329  }
   330  
   331  type groupConcatOrder struct {
   332  	baseGroupConcat4String
   333  }
   334  
   335  func (e *groupConcatOrder) AppendFinalResult2Chunk(sctx stochastikctx.Context, pr PartialResult, chk *chunk.Chunk) error {
   336  	p := (*partialResult4GroupConcatOrder)(pr)
   337  	if p.topN.Len() == 0 {
   338  		chk.AppendNull(e.ordinal)
   339  		return nil
   340  	}
   341  	chk.AppendString(e.ordinal, p.topN.concat(e.sep, *e.truncated == 1))
   342  	return nil
   343  }
   344  
   345  func (e *groupConcatOrder) AllocPartialResult() (pr PartialResult, memDelta int64) {
   346  	desc := make([]bool, len(e.byItems))
   347  	for i, byItem := range e.byItems {
   348  		desc[i] = byItem.Desc
   349  	}
   350  	p := &partialResult4GroupConcatOrder{
   351  		topN: &topNEvents{
   352  			desc:      desc,
   353  			currSize:  0,
   354  			limitSize: e.maxLen,
   355  			sepSize:   uint64(len(e.sep)),
   356  		},
   357  	}
   358  	return PartialResult(p), 0
   359  }
   360  
   361  func (e *groupConcatOrder) ResetPartialResult(pr PartialResult) {
   362  	p := (*partialResult4GroupConcatOrder)(pr)
   363  	p.topN.reset()
   364  }
   365  
   366  func (e *groupConcatOrder) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) {
   367  	p := (*partialResult4GroupConcatOrder)(pr)
   368  	p.topN.sctx = sctx
   369  	v, isNull := "", false
   370  	for _, event := range rowsInGroup {
   371  		buffer := new(bytes.Buffer)
   372  		for _, arg := range e.args {
   373  			v, isNull, err = arg.EvalString(sctx, event)
   374  			if err != nil {
   375  				return 0, err
   376  			}
   377  			if isNull {
   378  				break
   379  			}
   380  			buffer.WriteString(v)
   381  		}
   382  		if isNull {
   383  			continue
   384  		}
   385  		sortEvent := sortEvent{
   386  			buffer:  buffer,
   387  			byItems: make([]*types.Causet, 0, len(e.byItems)),
   388  		}
   389  		for _, byItem := range e.byItems {
   390  			d, err := byItem.Expr.Eval(event)
   391  			if err != nil {
   392  				return 0, err
   393  			}
   394  			sortEvent.byItems = append(sortEvent.byItems, d.Clone())
   395  		}
   396  		truncated := p.topN.tryToAdd(sortEvent)
   397  		if p.topN.err != nil {
   398  			return 0, p.topN.err
   399  		}
   400  		if truncated {
   401  			if err := e.handleTruncateError(sctx); err != nil {
   402  				return 0, err
   403  			}
   404  		}
   405  	}
   406  	return 0, nil
   407  }
   408  
   409  func (e *groupConcatOrder) MergePartialResult(sctx stochastikctx.Context, src, dst PartialResult) (memDelta int64, err error) {
   410  	// If order by exists, the parallel hash aggregation is forbidden in interlockBuilder.buildHashAgg.
   411  	// So MergePartialResult will not be called.
   412  	return 0, terror.ClassOptimizer.New(allegrosql.ErrInternal, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrInternal]).GenWithStack("groupConcatOrder.MergePartialResult should not be called")
   413  }
   414  
   415  // SetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type.
   416  func (e *groupConcatOrder) SetTruncated(t *int32) {
   417  	e.truncated = t
   418  }
   419  
   420  // GetTruncated will be called in `interlockBuilder#buildHashAgg` with duck-type.
   421  func (e *groupConcatOrder) GetTruncated() *int32 {
   422  	return e.truncated
   423  }
   424  
   425  type partialResult4GroupConcatOrderDistinct struct {
   426  	topN              *topNEvents
   427  	valSet            set.StringSet
   428  	encodeBytesBuffer []byte
   429  }
   430  
   431  type groupConcatDistinctOrder struct {
   432  	baseGroupConcat4String
   433  }
   434  
   435  func (e *groupConcatDistinctOrder) AppendFinalResult2Chunk(sctx stochastikctx.Context, pr PartialResult, chk *chunk.Chunk) error {
   436  	p := (*partialResult4GroupConcatOrderDistinct)(pr)
   437  	if p.topN.Len() == 0 {
   438  		chk.AppendNull(e.ordinal)
   439  		return nil
   440  	}
   441  	chk.AppendString(e.ordinal, p.topN.concat(e.sep, *e.truncated == 1))
   442  	return nil
   443  }
   444  
   445  func (e *groupConcatDistinctOrder) AllocPartialResult() (pr PartialResult, memDelta int64) {
   446  	desc := make([]bool, len(e.byItems))
   447  	for i, byItem := range e.byItems {
   448  		desc[i] = byItem.Desc
   449  	}
   450  	p := &partialResult4GroupConcatOrderDistinct{
   451  		topN: &topNEvents{
   452  			desc:      desc,
   453  			currSize:  0,
   454  			limitSize: e.maxLen,
   455  			sepSize:   uint64(len(e.sep)),
   456  		},
   457  		valSet: set.NewStringSet(),
   458  	}
   459  	return PartialResult(p), 0
   460  }
   461  
   462  func (e *groupConcatDistinctOrder) ResetPartialResult(pr PartialResult) {
   463  	p := (*partialResult4GroupConcatOrderDistinct)(pr)
   464  	p.topN.reset()
   465  	p.valSet = set.NewStringSet()
   466  }
   467  
   468  func (e *groupConcatDistinctOrder) UFIDelatePartialResult(sctx stochastikctx.Context, rowsInGroup []chunk.Event, pr PartialResult) (memDelta int64, err error) {
   469  	p := (*partialResult4GroupConcatOrderDistinct)(pr)
   470  	p.topN.sctx = sctx
   471  	v, isNull := "", false
   472  	for _, event := range rowsInGroup {
   473  		buffer := new(bytes.Buffer)
   474  		p.encodeBytesBuffer = p.encodeBytesBuffer[:0]
   475  		for _, arg := range e.args {
   476  			v, isNull, err = arg.EvalString(sctx, event)
   477  			if err != nil {
   478  				return 0, err
   479  			}
   480  			if isNull {
   481  				break
   482  			}
   483  			p.encodeBytesBuffer = codec.EncodeBytes(p.encodeBytesBuffer, replog.Slice(v))
   484  			buffer.WriteString(v)
   485  		}
   486  		if isNull {
   487  			continue
   488  		}
   489  		joinedVal := string(p.encodeBytesBuffer)
   490  		if p.valSet.Exist(joinedVal) {
   491  			continue
   492  		}
   493  		p.valSet.Insert(joinedVal)
   494  		sortEvent := sortEvent{
   495  			buffer:  buffer,
   496  			byItems: make([]*types.Causet, 0, len(e.byItems)),
   497  		}
   498  		for _, byItem := range e.byItems {
   499  			d, err := byItem.Expr.Eval(event)
   500  			if err != nil {
   501  				return 0, err
   502  			}
   503  			sortEvent.byItems = append(sortEvent.byItems, d.Clone())
   504  		}
   505  		truncated := p.topN.tryToAdd(sortEvent)
   506  		if p.topN.err != nil {
   507  			return 0, p.topN.err
   508  		}
   509  		if truncated {
   510  			if err := e.handleTruncateError(sctx); err != nil {
   511  				return 0, err
   512  			}
   513  		}
   514  	}
   515  	return 0, nil
   516  }
   517  
   518  func (e *groupConcatDistinctOrder) MergePartialResult(sctx stochastikctx.Context, src, dst PartialResult) (memDelta int64, err error) {
   519  	// If order by exists, the parallel hash aggregation is forbidden in interlockBuilder.buildHashAgg.
   520  	// So MergePartialResult will not be called.
   521  	return 0, terror.ClassOptimizer.New(allegrosql.ErrInternal, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrInternal]).GenWithStack("groupConcatDistinctOrder.MergePartialResult should not be called")
   522  }