github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sem/builtins/window_frame_builtins.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package builtins
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"strings"
    17  
    18  	"github.com/cockroachdb/apd"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    20  	"github.com/cockroachdb/cockroach/pkg/util/duration"
    21  	"github.com/cockroachdb/cockroach/pkg/util/ring"
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  // indexedValue combines a value from the row with the index of that row.
    26  type indexedValue struct {
    27  	value tree.Datum
    28  	idx   int
    29  }
    30  
    31  // slidingWindow maintains a deque of values along with corresponding indices
    32  // based on cmp function:
    33  // for Min behavior, cmp = -a.Compare(b)
    34  // for Max behavior, cmp = a.Compare(b)
    35  //
    36  // It assumes that the frame bounds will never go back, i.e. non-decreasing
    37  // sequences of frame start and frame end indices.
    38  type slidingWindow struct {
    39  	values  ring.Buffer
    40  	evalCtx *tree.EvalContext
    41  	cmp     func(*tree.EvalContext, tree.Datum, tree.Datum) int
    42  }
    43  
    44  func makeSlidingWindow(
    45  	evalCtx *tree.EvalContext, cmp func(*tree.EvalContext, tree.Datum, tree.Datum) int,
    46  ) *slidingWindow {
    47  	return &slidingWindow{
    48  		evalCtx: evalCtx,
    49  		cmp:     cmp,
    50  	}
    51  }
    52  
    53  // add first removes all values that are "smaller or equal" (depending on cmp)
    54  // from the end of the deque and then appends 'iv' to the end. This way, the
    55  // deque always contains unique values sorted in descending order of their
    56  // "priority" (when we encounter duplicates, we always keep the one with the
    57  // largest idx).
    58  func (sw *slidingWindow) add(iv *indexedValue) {
    59  	for i := sw.values.Len() - 1; i >= 0; i-- {
    60  		if sw.cmp(sw.evalCtx, sw.values.Get(i).(*indexedValue).value, iv.value) > 0 {
    61  			break
    62  		}
    63  		sw.values.RemoveLast()
    64  	}
    65  	sw.values.AddLast(iv)
    66  }
    67  
    68  // removeAllBefore removes all values from the beginning of the deque that have
    69  // indices smaller than given 'idx'. This operation corresponds to shifting the
    70  // start of the frame up to 'idx'.
    71  func (sw *slidingWindow) removeAllBefore(idx int) {
    72  	for i := 0; i < sw.values.Len() && i < idx; i++ {
    73  		if sw.values.Get(i).(*indexedValue).idx >= idx {
    74  			break
    75  		}
    76  		sw.values.RemoveFirst()
    77  	}
    78  }
    79  
    80  func (sw *slidingWindow) string() string {
    81  	var builder strings.Builder
    82  	for i := 0; i < sw.values.Len(); i++ {
    83  		builder.WriteString(fmt.Sprintf("(%v, %v)\t", sw.values.Get(i).(*indexedValue).value, sw.values.Get(i).(*indexedValue).idx))
    84  	}
    85  	return builder.String()
    86  }
    87  
    88  func (sw *slidingWindow) reset() {
    89  	sw.values.Reset()
    90  }
    91  
    92  type slidingWindowFunc struct {
    93  	sw      *slidingWindow
    94  	prevEnd int
    95  }
    96  
    97  // Compute implements WindowFunc interface.
    98  func (w *slidingWindowFunc) Compute(
    99  	ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun,
   100  ) (tree.Datum, error) {
   101  	frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx)
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	frameEndIdx, err := wfr.FrameEndIdx(ctx, evalCtx)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	if !wfr.Frame.DefaultFrameExclusion() {
   111  		// We cannot use a sliding window approach because we have a frame
   112  		// exclusion clause - some rows will be in and out of the frame which
   113  		// breaks the necessary assumption, so we fallback to a naive quadratic
   114  		// approach.
   115  		var res tree.Datum
   116  		for idx := frameStartIdx; idx < frameEndIdx; idx++ {
   117  			if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil {
   118  				return nil, err
   119  			} else if skipped {
   120  				continue
   121  			}
   122  			args, err := wfr.ArgsByRowIdx(ctx, idx)
   123  			if err != nil {
   124  				return nil, err
   125  			}
   126  			if res == nil {
   127  				res = args[0]
   128  			} else {
   129  				if w.sw.cmp(evalCtx, args[0], res) > 0 {
   130  					res = args[0]
   131  				}
   132  			}
   133  		}
   134  		if res == nil {
   135  			// Spec: the frame is empty, so we return NULL.
   136  			return tree.DNull, nil
   137  		}
   138  		return res, nil
   139  	}
   140  
   141  	// We need to discard all values that are no longer in the frame.
   142  	w.sw.removeAllBefore(frameStartIdx)
   143  
   144  	// We need to add all values that just entered the frame and have not been
   145  	// added yet.
   146  	for idx := max(w.prevEnd, frameStartIdx); idx < frameEndIdx; idx++ {
   147  		if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil {
   148  			return nil, err
   149  		} else if skipped {
   150  			continue
   151  		}
   152  		args, err := wfr.ArgsByRowIdx(ctx, idx)
   153  		if err != nil {
   154  			return nil, err
   155  		}
   156  		value := args[0]
   157  		if value == tree.DNull {
   158  			// Null value can neither be minimum nor maximum over a window frame with
   159  			// non-null values, so we're not adding them to the sliding window. The
   160  			// case of a window frame with no non-null values is handled below.
   161  			continue
   162  		}
   163  		w.sw.add(&indexedValue{value: value, idx: idx})
   164  	}
   165  	w.prevEnd = frameEndIdx
   166  
   167  	if w.sw.values.Len() == 0 {
   168  		// Spec: the frame is empty, so we return NULL.
   169  		return tree.DNull, nil
   170  	}
   171  
   172  	// The datum with "highest priority" within the frame is at the very front
   173  	// of the deque.
   174  	return w.sw.values.GetFirst().(*indexedValue).value, nil
   175  }
   176  
   177  func max(a, b int) int {
   178  	if a > b {
   179  		return a
   180  	}
   181  	return b
   182  }
   183  
   184  // Reset implements tree.WindowFunc interface.
   185  func (w *slidingWindowFunc) Reset(context.Context) {
   186  	w.prevEnd = 0
   187  	w.sw.reset()
   188  }
   189  
   190  // Close implements WindowFunc interface.
   191  func (w *slidingWindowFunc) Close(context.Context, *tree.EvalContext) {
   192  	w.sw = nil
   193  }
   194  
   195  // slidingWindowSumFunc applies sliding window approach to summation over
   196  // a frame. It assumes that the frame bounds will never go back, i.e.
   197  // non-decreasing sequences of frame start and frame end indices.
   198  type slidingWindowSumFunc struct {
   199  	agg                tree.AggregateFunc // one of the four SumAggregates
   200  	prevStart, prevEnd int
   201  
   202  	// lastNonNullIdx is the index of the latest non-null value seen in the
   203  	// sliding window so far. noNonNullSeen indicates non-null values are yet to
   204  	// be seen.
   205  	lastNonNullIdx int
   206  }
   207  
   208  const noNonNullSeen = -1
   209  
   210  func newSlidingWindowSumFunc(agg tree.AggregateFunc) *slidingWindowSumFunc {
   211  	return &slidingWindowSumFunc{
   212  		agg:            agg,
   213  		lastNonNullIdx: noNonNullSeen,
   214  	}
   215  }
   216  
   217  // removeAllBefore subtracts the values from all the rows that are no longer in
   218  // the frame.
   219  func (w *slidingWindowSumFunc) removeAllBefore(
   220  	ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun,
   221  ) error {
   222  	frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx)
   223  	if err != nil {
   224  		return err
   225  	}
   226  	for idx := w.prevStart; idx < frameStartIdx && idx < w.prevEnd; idx++ {
   227  		if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil {
   228  			return err
   229  		} else if skipped {
   230  			continue
   231  		}
   232  		args, err := wfr.ArgsByRowIdx(ctx, idx)
   233  		if err != nil {
   234  			return err
   235  		}
   236  		value := args[0]
   237  		if value == tree.DNull {
   238  			// Null values do not contribute to the running sum, so there is nothing
   239  			// to subtract once they leave the window frame.
   240  			continue
   241  		}
   242  		switch v := value.(type) {
   243  		case *tree.DInt:
   244  			err = w.agg.Add(ctx, tree.NewDInt(-*v))
   245  		case *tree.DDecimal:
   246  			d := tree.DDecimal{}
   247  			d.Neg(&v.Decimal)
   248  			err = w.agg.Add(ctx, &d)
   249  		case *tree.DFloat:
   250  			err = w.agg.Add(ctx, tree.NewDFloat(-*v))
   251  		case *tree.DInterval:
   252  			err = w.agg.Add(ctx, &tree.DInterval{Duration: duration.Duration{}.Sub(v.Duration)})
   253  		default:
   254  			err = errors.AssertionFailedf("unexpected value %v", v)
   255  		}
   256  		if err != nil {
   257  			return err
   258  		}
   259  	}
   260  	return nil
   261  }
   262  
   263  // Compute implements WindowFunc interface.
   264  func (w *slidingWindowSumFunc) Compute(
   265  	ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun,
   266  ) (tree.Datum, error) {
   267  	frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx)
   268  	if err != nil {
   269  		return nil, err
   270  	}
   271  	frameEndIdx, err := wfr.FrameEndIdx(ctx, evalCtx)
   272  	if err != nil {
   273  		return nil, err
   274  	}
   275  	if !wfr.Frame.DefaultFrameExclusion() {
   276  		// We cannot use a sliding window approach because we have a frame
   277  		// exclusion clause - some rows will be in and out of the frame which
   278  		// breaks the necessary assumption, so we fallback to a naive quadratic
   279  		// approach.
   280  		w.agg.Reset(ctx)
   281  		for idx := frameStartIdx; idx < frameEndIdx; idx++ {
   282  			if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil {
   283  				return nil, err
   284  			} else if skipped {
   285  				continue
   286  			}
   287  			args, err := wfr.ArgsByRowIdx(ctx, idx)
   288  			if err != nil {
   289  				return nil, err
   290  			}
   291  			if err = w.agg.Add(ctx, args[0]); err != nil {
   292  				return nil, err
   293  			}
   294  		}
   295  		return w.agg.Result()
   296  	}
   297  
   298  	// We need to discard all values that are no longer in the frame.
   299  	if err = w.removeAllBefore(ctx, evalCtx, wfr); err != nil {
   300  		return nil, err
   301  	}
   302  
   303  	// We need to sum all values that just entered the frame and have not been
   304  	// added yet.
   305  	for idx := max(w.prevEnd, frameStartIdx); idx < frameEndIdx; idx++ {
   306  		if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil {
   307  			return nil, err
   308  		} else if skipped {
   309  			continue
   310  		}
   311  		args, err := wfr.ArgsByRowIdx(ctx, idx)
   312  		if err != nil {
   313  			return nil, err
   314  		}
   315  		if args[0] != tree.DNull {
   316  			w.lastNonNullIdx = idx
   317  			err = w.agg.Add(ctx, args[0])
   318  			if err != nil {
   319  				return nil, err
   320  			}
   321  		}
   322  	}
   323  
   324  	w.prevStart = frameStartIdx
   325  	w.prevEnd = frameEndIdx
   326  	// If last non-null value has index smaller than the start of the window
   327  	// frame, then only nulls can be in the frame. This holds true as well for
   328  	// the special noNonNullsSeen index.
   329  	onlyNulls := w.lastNonNullIdx < frameStartIdx
   330  	if frameStartIdx == frameEndIdx || onlyNulls {
   331  		// Either the window frame is empty or only null values are in the frame,
   332  		// so we return NULL as per spec.
   333  		return tree.DNull, nil
   334  	}
   335  	return w.agg.Result()
   336  }
   337  
   338  // Reset implements tree.WindowFunc interface.
   339  func (w *slidingWindowSumFunc) Reset(ctx context.Context) {
   340  	w.prevStart = 0
   341  	w.prevEnd = 0
   342  	w.lastNonNullIdx = noNonNullSeen
   343  	w.agg.Reset(ctx)
   344  }
   345  
   346  // Close implements WindowFunc interface.
   347  func (w *slidingWindowSumFunc) Close(ctx context.Context, _ *tree.EvalContext) {
   348  	w.agg.Close(ctx)
   349  }
   350  
   351  // avgWindowFunc uses slidingWindowSumFunc to compute average over a frame.
   352  type avgWindowFunc struct {
   353  	sum *slidingWindowSumFunc
   354  }
   355  
   356  // Compute implements WindowFunc interface.
   357  func (w *avgWindowFunc) Compute(
   358  	ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun,
   359  ) (tree.Datum, error) {
   360  	sum, err := w.sum.Compute(ctx, evalCtx, wfr)
   361  	if err != nil {
   362  		return nil, err
   363  	}
   364  	if sum == tree.DNull {
   365  		// Spec: the frame is empty, so we return NULL.
   366  		return tree.DNull, nil
   367  	}
   368  
   369  	frameSize := 0
   370  	frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx)
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  	frameEndIdx, err := wfr.FrameEndIdx(ctx, evalCtx)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  	for idx := frameStartIdx; idx < frameEndIdx; idx++ {
   379  		if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil {
   380  			return nil, err
   381  		} else if skipped {
   382  			continue
   383  		}
   384  		args, err := wfr.ArgsByRowIdx(ctx, idx)
   385  		if err != nil {
   386  			return nil, err
   387  		}
   388  		if args[0] == tree.DNull {
   389  			// Null values do not count towards the number of rows that contribute
   390  			// to the sum, so we're omitting them from the frame.
   391  			continue
   392  		}
   393  		frameSize++
   394  	}
   395  
   396  	switch t := sum.(type) {
   397  	case *tree.DFloat:
   398  		return tree.NewDFloat(*t / tree.DFloat(frameSize)), nil
   399  	case *tree.DDecimal:
   400  		var avg tree.DDecimal
   401  		count := apd.New(int64(frameSize), 0)
   402  		_, err := tree.DecimalCtx.Quo(&avg.Decimal, &t.Decimal, count)
   403  		return &avg, err
   404  	case *tree.DInt:
   405  		dd := tree.DDecimal{}
   406  		dd.SetFinite(int64(*t), 0)
   407  		var avg tree.DDecimal
   408  		count := apd.New(int64(frameSize), 0)
   409  		_, err := tree.DecimalCtx.Quo(&avg.Decimal, &dd.Decimal, count)
   410  		return &avg, err
   411  	case *tree.DInterval:
   412  		return &tree.DInterval{Duration: t.Duration.Div(int64(frameSize))}, nil
   413  	default:
   414  		return nil, errors.AssertionFailedf("unexpected SUM result type: %s", t)
   415  	}
   416  }
   417  
   418  // Reset implements tree.WindowFunc interface.
   419  func (w *avgWindowFunc) Reset(ctx context.Context) {
   420  	w.sum.Reset(ctx)
   421  }
   422  
   423  // Close implements WindowFunc interface.
   424  func (w *avgWindowFunc) Close(ctx context.Context, evalCtx *tree.EvalContext) {
   425  	w.sum.Close(ctx, evalCtx)
   426  }