github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/interlock/window.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"context"
    18  
    19  	"github.com/cznic/mathutil"
    20  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    21  	"github.com/whtcorpsinc/errors"
    22  	"github.com/whtcorpsinc/milevadb/causet/embedded"
    23  	"github.com/whtcorpsinc/milevadb/interlock/aggfuncs"
    24  	"github.com/whtcorpsinc/milevadb/memex"
    25  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    26  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    27  )
    28  
    29  // WindowInterDirc is the interlock for window functions.
    30  type WindowInterDirc struct {
    31  	baseInterlockingDirectorate
    32  
    33  	groupChecker *vecGroupChecker
    34  	// childResult stores the child chunk
    35  	childResult *chunk.Chunk
    36  	// executed indicates the child interlock is drained or something unexpected happened.
    37  	executed bool
    38  	// resultChunks stores the chunks to return
    39  	resultChunks []*chunk.Chunk
    40  	// remainingEventsInChunk indicates how many rows the resultChunks[i] is not prepared.
    41  	remainingEventsInChunk []int
    42  
    43  	numWindowFuncs int
    44  	processor      windowProcessor
    45  }
    46  
    47  // Close implements the InterlockingDirectorate Close interface.
    48  func (e *WindowInterDirc) Close() error {
    49  	return errors.Trace(e.baseInterlockingDirectorate.Close())
    50  }
    51  
    52  // Next implements the InterlockingDirectorate Next interface.
    53  func (e *WindowInterDirc) Next(ctx context.Context, chk *chunk.Chunk) error {
    54  	chk.Reset()
    55  	for !e.executed && !e.preparedChunkAvailable() {
    56  		err := e.consumeOneGroup(ctx)
    57  		if err != nil {
    58  			e.executed = true
    59  			return err
    60  		}
    61  	}
    62  	if len(e.resultChunks) > 0 {
    63  		chk.SwapDeferredCausets(e.resultChunks[0])
    64  		e.resultChunks[0] = nil // GC it. TODO: Reuse it.
    65  		e.resultChunks = e.resultChunks[1:]
    66  		e.remainingEventsInChunk = e.remainingEventsInChunk[1:]
    67  	}
    68  	return nil
    69  }
    70  
    71  func (e *WindowInterDirc) preparedChunkAvailable() bool {
    72  	return len(e.resultChunks) > 0 && e.remainingEventsInChunk[0] == 0
    73  }
    74  
    75  func (e *WindowInterDirc) consumeOneGroup(ctx context.Context) error {
    76  	var groupEvents []chunk.Event
    77  	if e.groupChecker.isExhausted() {
    78  		eof, err := e.fetchChild(ctx)
    79  		if err != nil {
    80  			return errors.Trace(err)
    81  		}
    82  		if eof {
    83  			e.executed = true
    84  			return e.consumeGroupEvents(groupEvents)
    85  		}
    86  		_, err = e.groupChecker.splitIntoGroups(e.childResult)
    87  		if err != nil {
    88  			return errors.Trace(err)
    89  		}
    90  	}
    91  	begin, end := e.groupChecker.getNextGroup()
    92  	for i := begin; i < end; i++ {
    93  		groupEvents = append(groupEvents, e.childResult.GetEvent(i))
    94  	}
    95  
    96  	for meetLastGroup := end == e.childResult.NumEvents(); meetLastGroup; {
    97  		meetLastGroup = false
    98  		eof, err := e.fetchChild(ctx)
    99  		if err != nil {
   100  			return errors.Trace(err)
   101  		}
   102  		if eof {
   103  			e.executed = true
   104  			return e.consumeGroupEvents(groupEvents)
   105  		}
   106  
   107  		isFirstGroupSameAsPrev, err := e.groupChecker.splitIntoGroups(e.childResult)
   108  		if err != nil {
   109  			return errors.Trace(err)
   110  		}
   111  
   112  		if isFirstGroupSameAsPrev {
   113  			begin, end = e.groupChecker.getNextGroup()
   114  			for i := begin; i < end; i++ {
   115  				groupEvents = append(groupEvents, e.childResult.GetEvent(i))
   116  			}
   117  			meetLastGroup = end == e.childResult.NumEvents()
   118  		}
   119  	}
   120  	return e.consumeGroupEvents(groupEvents)
   121  }
   122  
   123  func (e *WindowInterDirc) consumeGroupEvents(groupEvents []chunk.Event) (err error) {
   124  	remainingEventsInGroup := len(groupEvents)
   125  	if remainingEventsInGroup == 0 {
   126  		return nil
   127  	}
   128  	for i := 0; i < len(e.resultChunks); i++ {
   129  		remained := mathutil.Min(e.remainingEventsInChunk[i], remainingEventsInGroup)
   130  		e.remainingEventsInChunk[i] -= remained
   131  		remainingEventsInGroup -= remained
   132  
   133  		// TODO: Combine these three methods.
   134  		// The old implementation needs the processor has these three methods
   135  		// but now it does not have to.
   136  		groupEvents, err = e.processor.consumeGroupEvents(e.ctx, groupEvents)
   137  		if err != nil {
   138  			return errors.Trace(err)
   139  		}
   140  		_, err = e.processor.appendResult2Chunk(e.ctx, groupEvents, e.resultChunks[i], remained)
   141  		if err != nil {
   142  			return errors.Trace(err)
   143  		}
   144  		if remainingEventsInGroup == 0 {
   145  			e.processor.resetPartialResult()
   146  			break
   147  		}
   148  	}
   149  	return nil
   150  }
   151  
   152  func (e *WindowInterDirc) fetchChild(ctx context.Context) (EOF bool, err error) {
   153  	childResult := newFirstChunk(e.children[0])
   154  	err = Next(ctx, e.children[0], childResult)
   155  	if err != nil {
   156  		return false, errors.Trace(err)
   157  	}
   158  	// No more data.
   159  	numEvents := childResult.NumEvents()
   160  	if numEvents == 0 {
   161  		return true, nil
   162  	}
   163  
   164  	resultChk := chunk.New(e.retFieldTypes, 0, numEvents)
   165  	err = e.copyChk(childResult, resultChk)
   166  	if err != nil {
   167  		return false, err
   168  	}
   169  	e.resultChunks = append(e.resultChunks, resultChk)
   170  	e.remainingEventsInChunk = append(e.remainingEventsInChunk, numEvents)
   171  
   172  	e.childResult = childResult
   173  	return false, nil
   174  }
   175  
   176  func (e *WindowInterDirc) copyChk(src, dst *chunk.Chunk) error {
   177  	defCausumns := e.Schema().DeferredCausets[:len(e.Schema().DeferredCausets)-e.numWindowFuncs]
   178  	for i, defCaus := range defCausumns {
   179  		if err := dst.MakeRefTo(i, src, defCaus.Index); err != nil {
   180  			return err
   181  		}
   182  	}
   183  	return nil
   184  }
   185  
   186  // windowProcessor is the interface for processing different HoTTs of windows.
   187  type windowProcessor interface {
   188  	// consumeGroupEvents uFIDelates the result for an window function using the input rows
   189  	// which belong to the same partition.
   190  	consumeGroupEvents(ctx stochastikctx.Context, rows []chunk.Event) ([]chunk.Event, error)
   191  	// appendResult2Chunk appends the final results to chunk.
   192  	// It is called when there are no more rows in current partition.
   193  	appendResult2Chunk(ctx stochastikctx.Context, rows []chunk.Event, chk *chunk.Chunk, remained int) ([]chunk.Event, error)
   194  	// resetPartialResult resets the partial result to the original state for a specific window function.
   195  	resetPartialResult()
   196  }
   197  
   198  type aggWindowProcessor struct {
   199  	windowFuncs    []aggfuncs.AggFunc
   200  	partialResults []aggfuncs.PartialResult
   201  }
   202  
   203  func (p *aggWindowProcessor) consumeGroupEvents(ctx stochastikctx.Context, rows []chunk.Event) ([]chunk.Event, error) {
   204  	for i, windowFunc := range p.windowFuncs {
   205  		// @todo Add memory trace
   206  		_, err := windowFunc.UFIDelatePartialResult(ctx, rows, p.partialResults[i])
   207  		if err != nil {
   208  			return nil, err
   209  		}
   210  	}
   211  	rows = rows[:0]
   212  	return rows, nil
   213  }
   214  
   215  func (p *aggWindowProcessor) appendResult2Chunk(ctx stochastikctx.Context, rows []chunk.Event, chk *chunk.Chunk, remained int) ([]chunk.Event, error) {
   216  	for remained > 0 {
   217  		for i, windowFunc := range p.windowFuncs {
   218  			// TODO: We can extend the agg func interface to avoid the `for` loop  here.
   219  			err := windowFunc.AppendFinalResult2Chunk(ctx, p.partialResults[i], chk)
   220  			if err != nil {
   221  				return nil, err
   222  			}
   223  		}
   224  		remained--
   225  	}
   226  	return rows, nil
   227  }
   228  
   229  func (p *aggWindowProcessor) resetPartialResult() {
   230  	for i, windowFunc := range p.windowFuncs {
   231  		windowFunc.ResetPartialResult(p.partialResults[i])
   232  	}
   233  }
   234  
   235  type rowFrameWindowProcessor struct {
   236  	windowFuncs    []aggfuncs.AggFunc
   237  	partialResults []aggfuncs.PartialResult
   238  	start          *embedded.FrameBound
   239  	end            *embedded.FrameBound
   240  	curEventIdx    uint64
   241  }
   242  
   243  func (p *rowFrameWindowProcessor) getStartOffset(numEvents uint64) uint64 {
   244  	if p.start.UnBounded {
   245  		return 0
   246  	}
   247  	switch p.start.Type {
   248  	case ast.Preceding:
   249  		if p.curEventIdx >= p.start.Num {
   250  			return p.curEventIdx - p.start.Num
   251  		}
   252  		return 0
   253  	case ast.Following:
   254  		offset := p.curEventIdx + p.start.Num
   255  		if offset >= numEvents {
   256  			return numEvents
   257  		}
   258  		return offset
   259  	case ast.CurrentEvent:
   260  		return p.curEventIdx
   261  	}
   262  	// It will never reach here.
   263  	return 0
   264  }
   265  
   266  func (p *rowFrameWindowProcessor) getEndOffset(numEvents uint64) uint64 {
   267  	if p.end.UnBounded {
   268  		return numEvents
   269  	}
   270  	switch p.end.Type {
   271  	case ast.Preceding:
   272  		if p.curEventIdx >= p.end.Num {
   273  			return p.curEventIdx - p.end.Num + 1
   274  		}
   275  		return 0
   276  	case ast.Following:
   277  		offset := p.curEventIdx + p.end.Num
   278  		if offset >= numEvents {
   279  			return numEvents
   280  		}
   281  		return offset + 1
   282  	case ast.CurrentEvent:
   283  		return p.curEventIdx + 1
   284  	}
   285  	// It will never reach here.
   286  	return 0
   287  }
   288  
   289  func (p *rowFrameWindowProcessor) consumeGroupEvents(ctx stochastikctx.Context, rows []chunk.Event) ([]chunk.Event, error) {
   290  	return rows, nil
   291  }
   292  
   293  func (p *rowFrameWindowProcessor) appendResult2Chunk(ctx stochastikctx.Context, rows []chunk.Event, chk *chunk.Chunk, remained int) ([]chunk.Event, error) {
   294  	numEvents := uint64(len(rows))
   295  	var (
   296  		err                      error
   297  		initializedSlidingWindow bool
   298  		start                    uint64
   299  		end                      uint64
   300  		lastStart                uint64
   301  		lastEnd                  uint64
   302  		shiftStart               uint64
   303  		shiftEnd                 uint64
   304  	)
   305  	slidingWindowAggFuncs := make([]aggfuncs.SlidingWindowAggFunc, len(p.windowFuncs))
   306  	for i, windowFunc := range p.windowFuncs {
   307  		if slidingWindowAggFunc, ok := windowFunc.(aggfuncs.SlidingWindowAggFunc); ok {
   308  			slidingWindowAggFuncs[i] = slidingWindowAggFunc
   309  		}
   310  	}
   311  	for ; remained > 0; lastStart, lastEnd = start, end {
   312  		start = p.getStartOffset(numEvents)
   313  		end = p.getEndOffset(numEvents)
   314  		p.curEventIdx++
   315  		remained--
   316  		shiftStart = start - lastStart
   317  		shiftEnd = end - lastEnd
   318  		if start >= end {
   319  			for i, windowFunc := range p.windowFuncs {
   320  				slidingWindowAggFunc := slidingWindowAggFuncs[i]
   321  				if slidingWindowAggFunc != nil && initializedSlidingWindow {
   322  					err = slidingWindowAggFunc.Slide(ctx, rows, lastStart, lastEnd, shiftStart, shiftEnd, p.partialResults[i])
   323  					if err != nil {
   324  						return nil, err
   325  					}
   326  				}
   327  				err = windowFunc.AppendFinalResult2Chunk(ctx, p.partialResults[i], chk)
   328  				if err != nil {
   329  					return nil, err
   330  				}
   331  			}
   332  			continue
   333  		}
   334  
   335  		for i, windowFunc := range p.windowFuncs {
   336  			slidingWindowAggFunc := slidingWindowAggFuncs[i]
   337  			if slidingWindowAggFunc != nil && initializedSlidingWindow {
   338  				err = slidingWindowAggFunc.Slide(ctx, rows, lastStart, lastEnd, shiftStart, shiftEnd, p.partialResults[i])
   339  			} else {
   340  				_, err = windowFunc.UFIDelatePartialResult(ctx, rows[start:end], p.partialResults[i])
   341  			}
   342  			if err != nil {
   343  				return nil, err
   344  			}
   345  			err = windowFunc.AppendFinalResult2Chunk(ctx, p.partialResults[i], chk)
   346  			if err != nil {
   347  				return nil, err
   348  			}
   349  			if slidingWindowAggFunc == nil {
   350  				windowFunc.ResetPartialResult(p.partialResults[i])
   351  			}
   352  		}
   353  		if !initializedSlidingWindow {
   354  			initializedSlidingWindow = true
   355  		}
   356  	}
   357  	for i, windowFunc := range p.windowFuncs {
   358  		windowFunc.ResetPartialResult(p.partialResults[i])
   359  	}
   360  	return rows, nil
   361  }
   362  
   363  func (p *rowFrameWindowProcessor) resetPartialResult() {
   364  	p.curEventIdx = 0
   365  }
   366  
   367  type rangeFrameWindowProcessor struct {
   368  	windowFuncs     []aggfuncs.AggFunc
   369  	partialResults  []aggfuncs.PartialResult
   370  	start           *embedded.FrameBound
   371  	end             *embedded.FrameBound
   372  	curEventIdx     uint64
   373  	lastStartOffset uint64
   374  	lastEndOffset   uint64
   375  	orderByDefCauss []*memex.DeferredCauset
   376  	// expectedCmpResult is used to decide if one value is included in the frame.
   377  	expectedCmpResult int64
   378  }
   379  
   380  func (p *rangeFrameWindowProcessor) getStartOffset(ctx stochastikctx.Context, rows []chunk.Event) (uint64, error) {
   381  	if p.start.UnBounded {
   382  		return 0, nil
   383  	}
   384  	numEvents := uint64(len(rows))
   385  	for ; p.lastStartOffset < numEvents; p.lastStartOffset++ {
   386  		var res int64
   387  		var err error
   388  		for i := range p.orderByDefCauss {
   389  			res, _, err = p.start.CmpFuncs[i](ctx, p.orderByDefCauss[i], p.start.CalcFuncs[i], rows[p.lastStartOffset], rows[p.curEventIdx])
   390  			if err != nil {
   391  				return 0, err
   392  			}
   393  			if res != 0 {
   394  				break
   395  			}
   396  		}
   397  		// For asc, break when the current value is greater or equal to the calculated result;
   398  		// For desc, break when the current value is less or equal to the calculated result.
   399  		if res != p.expectedCmpResult {
   400  			break
   401  		}
   402  	}
   403  	return p.lastStartOffset, nil
   404  }
   405  
   406  func (p *rangeFrameWindowProcessor) getEndOffset(ctx stochastikctx.Context, rows []chunk.Event) (uint64, error) {
   407  	numEvents := uint64(len(rows))
   408  	if p.end.UnBounded {
   409  		return numEvents, nil
   410  	}
   411  	for ; p.lastEndOffset < numEvents; p.lastEndOffset++ {
   412  		var res int64
   413  		var err error
   414  		for i := range p.orderByDefCauss {
   415  			res, _, err = p.end.CmpFuncs[i](ctx, p.end.CalcFuncs[i], p.orderByDefCauss[i], rows[p.curEventIdx], rows[p.lastEndOffset])
   416  			if err != nil {
   417  				return 0, err
   418  			}
   419  			if res != 0 {
   420  				break
   421  			}
   422  		}
   423  		// For asc, break when the calculated result is greater than the current value.
   424  		// For desc, break when the calculated result is less than the current value.
   425  		if res == p.expectedCmpResult {
   426  			break
   427  		}
   428  	}
   429  	return p.lastEndOffset, nil
   430  }
   431  
   432  func (p *rangeFrameWindowProcessor) appendResult2Chunk(ctx stochastikctx.Context, rows []chunk.Event, chk *chunk.Chunk, remained int) ([]chunk.Event, error) {
   433  	var (
   434  		err                      error
   435  		initializedSlidingWindow bool
   436  		start                    uint64
   437  		end                      uint64
   438  		lastStart                uint64
   439  		lastEnd                  uint64
   440  		shiftStart               uint64
   441  		shiftEnd                 uint64
   442  	)
   443  	slidingWindowAggFuncs := make([]aggfuncs.SlidingWindowAggFunc, len(p.windowFuncs))
   444  	for i, windowFunc := range p.windowFuncs {
   445  		if slidingWindowAggFunc, ok := windowFunc.(aggfuncs.SlidingWindowAggFunc); ok {
   446  			slidingWindowAggFuncs[i] = slidingWindowAggFunc
   447  		}
   448  	}
   449  	for ; remained > 0; lastStart, lastEnd = start, end {
   450  		start, err = p.getStartOffset(ctx, rows)
   451  		if err != nil {
   452  			return nil, err
   453  		}
   454  		end, err = p.getEndOffset(ctx, rows)
   455  		if err != nil {
   456  			return nil, err
   457  		}
   458  		p.curEventIdx++
   459  		remained--
   460  		shiftStart = start - lastStart
   461  		shiftEnd = end - lastEnd
   462  		if start >= end {
   463  			for i, windowFunc := range p.windowFuncs {
   464  				slidingWindowAggFunc := slidingWindowAggFuncs[i]
   465  				if slidingWindowAggFunc != nil && initializedSlidingWindow {
   466  					err = slidingWindowAggFunc.Slide(ctx, rows, lastStart, lastEnd, shiftStart, shiftEnd, p.partialResults[i])
   467  					if err != nil {
   468  						return nil, err
   469  					}
   470  				}
   471  				err = windowFunc.AppendFinalResult2Chunk(ctx, p.partialResults[i], chk)
   472  				if err != nil {
   473  					return nil, err
   474  				}
   475  			}
   476  			continue
   477  		}
   478  
   479  		for i, windowFunc := range p.windowFuncs {
   480  			slidingWindowAggFunc := slidingWindowAggFuncs[i]
   481  			if slidingWindowAggFunc != nil && initializedSlidingWindow {
   482  				err = slidingWindowAggFunc.Slide(ctx, rows, lastStart, lastEnd, shiftStart, shiftEnd, p.partialResults[i])
   483  			} else {
   484  				_, err = windowFunc.UFIDelatePartialResult(ctx, rows[start:end], p.partialResults[i])
   485  			}
   486  			if err != nil {
   487  				return nil, err
   488  			}
   489  			err = windowFunc.AppendFinalResult2Chunk(ctx, p.partialResults[i], chk)
   490  			if err != nil {
   491  				return nil, err
   492  			}
   493  			if slidingWindowAggFunc == nil {
   494  				windowFunc.ResetPartialResult(p.partialResults[i])
   495  			}
   496  		}
   497  		if !initializedSlidingWindow {
   498  			initializedSlidingWindow = true
   499  		}
   500  	}
   501  	for i, windowFunc := range p.windowFuncs {
   502  		windowFunc.ResetPartialResult(p.partialResults[i])
   503  	}
   504  	return rows, nil
   505  }
   506  
   507  func (p *rangeFrameWindowProcessor) consumeGroupEvents(ctx stochastikctx.Context, rows []chunk.Event) ([]chunk.Event, error) {
   508  	return rows, nil
   509  }
   510  
   511  func (p *rangeFrameWindowProcessor) resetPartialResult() {
   512  	p.curEventIdx = 0
   513  	p.lastStartOffset = 0
   514  	p.lastEndOffset = 0
   515  }