github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sem/builtins/window_frame_builtins.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package builtins 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 18 "github.com/cockroachdb/apd" 19 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 20 "github.com/cockroachdb/cockroach/pkg/util/duration" 21 "github.com/cockroachdb/cockroach/pkg/util/ring" 22 "github.com/cockroachdb/errors" 23 ) 24 25 // indexedValue combines a value from the row with the index of that row. 26 type indexedValue struct { 27 value tree.Datum 28 idx int 29 } 30 31 // slidingWindow maintains a deque of values along with corresponding indices 32 // based on cmp function: 33 // for Min behavior, cmp = -a.Compare(b) 34 // for Max behavior, cmp = a.Compare(b) 35 // 36 // It assumes that the frame bounds will never go back, i.e. non-decreasing 37 // sequences of frame start and frame end indices. 38 type slidingWindow struct { 39 values ring.Buffer 40 evalCtx *tree.EvalContext 41 cmp func(*tree.EvalContext, tree.Datum, tree.Datum) int 42 } 43 44 func makeSlidingWindow( 45 evalCtx *tree.EvalContext, cmp func(*tree.EvalContext, tree.Datum, tree.Datum) int, 46 ) *slidingWindow { 47 return &slidingWindow{ 48 evalCtx: evalCtx, 49 cmp: cmp, 50 } 51 } 52 53 // add first removes all values that are "smaller or equal" (depending on cmp) 54 // from the end of the deque and then appends 'iv' to the end. This way, the 55 // deque always contains unique values sorted in descending order of their 56 // "priority" (when we encounter duplicates, we always keep the one with the 57 // largest idx). 58 func (sw *slidingWindow) add(iv *indexedValue) { 59 for i := sw.values.Len() - 1; i >= 0; i-- { 60 if sw.cmp(sw.evalCtx, sw.values.Get(i).(*indexedValue).value, iv.value) > 0 { 61 break 62 } 63 sw.values.RemoveLast() 64 } 65 sw.values.AddLast(iv) 66 } 67 68 // removeAllBefore removes all values from the beginning of the deque that have 69 // indices smaller than given 'idx'. This operation corresponds to shifting the 70 // start of the frame up to 'idx'. 71 func (sw *slidingWindow) removeAllBefore(idx int) { 72 for i := 0; i < sw.values.Len() && i < idx; i++ { 73 if sw.values.Get(i).(*indexedValue).idx >= idx { 74 break 75 } 76 sw.values.RemoveFirst() 77 } 78 } 79 80 func (sw *slidingWindow) string() string { 81 var builder strings.Builder 82 for i := 0; i < sw.values.Len(); i++ { 83 builder.WriteString(fmt.Sprintf("(%v, %v)\t", sw.values.Get(i).(*indexedValue).value, sw.values.Get(i).(*indexedValue).idx)) 84 } 85 return builder.String() 86 } 87 88 func (sw *slidingWindow) reset() { 89 sw.values.Reset() 90 } 91 92 type slidingWindowFunc struct { 93 sw *slidingWindow 94 prevEnd int 95 } 96 97 // Compute implements WindowFunc interface. 98 func (w *slidingWindowFunc) Compute( 99 ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun, 100 ) (tree.Datum, error) { 101 frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx) 102 if err != nil { 103 return nil, err 104 } 105 frameEndIdx, err := wfr.FrameEndIdx(ctx, evalCtx) 106 if err != nil { 107 return nil, err 108 } 109 110 if !wfr.Frame.DefaultFrameExclusion() { 111 // We cannot use a sliding window approach because we have a frame 112 // exclusion clause - some rows will be in and out of the frame which 113 // breaks the necessary assumption, so we fallback to a naive quadratic 114 // approach. 115 var res tree.Datum 116 for idx := frameStartIdx; idx < frameEndIdx; idx++ { 117 if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil { 118 return nil, err 119 } else if skipped { 120 continue 121 } 122 args, err := wfr.ArgsByRowIdx(ctx, idx) 123 if err != nil { 124 return nil, err 125 } 126 if res == nil { 127 res = args[0] 128 } else { 129 if w.sw.cmp(evalCtx, args[0], res) > 0 { 130 res = args[0] 131 } 132 } 133 } 134 if res == nil { 135 // Spec: the frame is empty, so we return NULL. 136 return tree.DNull, nil 137 } 138 return res, nil 139 } 140 141 // We need to discard all values that are no longer in the frame. 142 w.sw.removeAllBefore(frameStartIdx) 143 144 // We need to add all values that just entered the frame and have not been 145 // added yet. 146 for idx := max(w.prevEnd, frameStartIdx); idx < frameEndIdx; idx++ { 147 if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil { 148 return nil, err 149 } else if skipped { 150 continue 151 } 152 args, err := wfr.ArgsByRowIdx(ctx, idx) 153 if err != nil { 154 return nil, err 155 } 156 value := args[0] 157 if value == tree.DNull { 158 // Null value can neither be minimum nor maximum over a window frame with 159 // non-null values, so we're not adding them to the sliding window. The 160 // case of a window frame with no non-null values is handled below. 161 continue 162 } 163 w.sw.add(&indexedValue{value: value, idx: idx}) 164 } 165 w.prevEnd = frameEndIdx 166 167 if w.sw.values.Len() == 0 { 168 // Spec: the frame is empty, so we return NULL. 169 return tree.DNull, nil 170 } 171 172 // The datum with "highest priority" within the frame is at the very front 173 // of the deque. 174 return w.sw.values.GetFirst().(*indexedValue).value, nil 175 } 176 177 func max(a, b int) int { 178 if a > b { 179 return a 180 } 181 return b 182 } 183 184 // Reset implements tree.WindowFunc interface. 185 func (w *slidingWindowFunc) Reset(context.Context) { 186 w.prevEnd = 0 187 w.sw.reset() 188 } 189 190 // Close implements WindowFunc interface. 191 func (w *slidingWindowFunc) Close(context.Context, *tree.EvalContext) { 192 w.sw = nil 193 } 194 195 // slidingWindowSumFunc applies sliding window approach to summation over 196 // a frame. It assumes that the frame bounds will never go back, i.e. 197 // non-decreasing sequences of frame start and frame end indices. 198 type slidingWindowSumFunc struct { 199 agg tree.AggregateFunc // one of the four SumAggregates 200 prevStart, prevEnd int 201 202 // lastNonNullIdx is the index of the latest non-null value seen in the 203 // sliding window so far. noNonNullSeen indicates non-null values are yet to 204 // be seen. 205 lastNonNullIdx int 206 } 207 208 const noNonNullSeen = -1 209 210 func newSlidingWindowSumFunc(agg tree.AggregateFunc) *slidingWindowSumFunc { 211 return &slidingWindowSumFunc{ 212 agg: agg, 213 lastNonNullIdx: noNonNullSeen, 214 } 215 } 216 217 // removeAllBefore subtracts the values from all the rows that are no longer in 218 // the frame. 219 func (w *slidingWindowSumFunc) removeAllBefore( 220 ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun, 221 ) error { 222 frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx) 223 if err != nil { 224 return err 225 } 226 for idx := w.prevStart; idx < frameStartIdx && idx < w.prevEnd; idx++ { 227 if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil { 228 return err 229 } else if skipped { 230 continue 231 } 232 args, err := wfr.ArgsByRowIdx(ctx, idx) 233 if err != nil { 234 return err 235 } 236 value := args[0] 237 if value == tree.DNull { 238 // Null values do not contribute to the running sum, so there is nothing 239 // to subtract once they leave the window frame. 240 continue 241 } 242 switch v := value.(type) { 243 case *tree.DInt: 244 err = w.agg.Add(ctx, tree.NewDInt(-*v)) 245 case *tree.DDecimal: 246 d := tree.DDecimal{} 247 d.Neg(&v.Decimal) 248 err = w.agg.Add(ctx, &d) 249 case *tree.DFloat: 250 err = w.agg.Add(ctx, tree.NewDFloat(-*v)) 251 case *tree.DInterval: 252 err = w.agg.Add(ctx, &tree.DInterval{Duration: duration.Duration{}.Sub(v.Duration)}) 253 default: 254 err = errors.AssertionFailedf("unexpected value %v", v) 255 } 256 if err != nil { 257 return err 258 } 259 } 260 return nil 261 } 262 263 // Compute implements WindowFunc interface. 264 func (w *slidingWindowSumFunc) Compute( 265 ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun, 266 ) (tree.Datum, error) { 267 frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx) 268 if err != nil { 269 return nil, err 270 } 271 frameEndIdx, err := wfr.FrameEndIdx(ctx, evalCtx) 272 if err != nil { 273 return nil, err 274 } 275 if !wfr.Frame.DefaultFrameExclusion() { 276 // We cannot use a sliding window approach because we have a frame 277 // exclusion clause - some rows will be in and out of the frame which 278 // breaks the necessary assumption, so we fallback to a naive quadratic 279 // approach. 280 w.agg.Reset(ctx) 281 for idx := frameStartIdx; idx < frameEndIdx; idx++ { 282 if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil { 283 return nil, err 284 } else if skipped { 285 continue 286 } 287 args, err := wfr.ArgsByRowIdx(ctx, idx) 288 if err != nil { 289 return nil, err 290 } 291 if err = w.agg.Add(ctx, args[0]); err != nil { 292 return nil, err 293 } 294 } 295 return w.agg.Result() 296 } 297 298 // We need to discard all values that are no longer in the frame. 299 if err = w.removeAllBefore(ctx, evalCtx, wfr); err != nil { 300 return nil, err 301 } 302 303 // We need to sum all values that just entered the frame and have not been 304 // added yet. 305 for idx := max(w.prevEnd, frameStartIdx); idx < frameEndIdx; idx++ { 306 if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil { 307 return nil, err 308 } else if skipped { 309 continue 310 } 311 args, err := wfr.ArgsByRowIdx(ctx, idx) 312 if err != nil { 313 return nil, err 314 } 315 if args[0] != tree.DNull { 316 w.lastNonNullIdx = idx 317 err = w.agg.Add(ctx, args[0]) 318 if err != nil { 319 return nil, err 320 } 321 } 322 } 323 324 w.prevStart = frameStartIdx 325 w.prevEnd = frameEndIdx 326 // If last non-null value has index smaller than the start of the window 327 // frame, then only nulls can be in the frame. This holds true as well for 328 // the special noNonNullsSeen index. 329 onlyNulls := w.lastNonNullIdx < frameStartIdx 330 if frameStartIdx == frameEndIdx || onlyNulls { 331 // Either the window frame is empty or only null values are in the frame, 332 // so we return NULL as per spec. 333 return tree.DNull, nil 334 } 335 return w.agg.Result() 336 } 337 338 // Reset implements tree.WindowFunc interface. 339 func (w *slidingWindowSumFunc) Reset(ctx context.Context) { 340 w.prevStart = 0 341 w.prevEnd = 0 342 w.lastNonNullIdx = noNonNullSeen 343 w.agg.Reset(ctx) 344 } 345 346 // Close implements WindowFunc interface. 347 func (w *slidingWindowSumFunc) Close(ctx context.Context, _ *tree.EvalContext) { 348 w.agg.Close(ctx) 349 } 350 351 // avgWindowFunc uses slidingWindowSumFunc to compute average over a frame. 352 type avgWindowFunc struct { 353 sum *slidingWindowSumFunc 354 } 355 356 // Compute implements WindowFunc interface. 357 func (w *avgWindowFunc) Compute( 358 ctx context.Context, evalCtx *tree.EvalContext, wfr *tree.WindowFrameRun, 359 ) (tree.Datum, error) { 360 sum, err := w.sum.Compute(ctx, evalCtx, wfr) 361 if err != nil { 362 return nil, err 363 } 364 if sum == tree.DNull { 365 // Spec: the frame is empty, so we return NULL. 366 return tree.DNull, nil 367 } 368 369 frameSize := 0 370 frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx) 371 if err != nil { 372 return nil, err 373 } 374 frameEndIdx, err := wfr.FrameEndIdx(ctx, evalCtx) 375 if err != nil { 376 return nil, err 377 } 378 for idx := frameStartIdx; idx < frameEndIdx; idx++ { 379 if skipped, err := wfr.IsRowSkipped(ctx, idx); err != nil { 380 return nil, err 381 } else if skipped { 382 continue 383 } 384 args, err := wfr.ArgsByRowIdx(ctx, idx) 385 if err != nil { 386 return nil, err 387 } 388 if args[0] == tree.DNull { 389 // Null values do not count towards the number of rows that contribute 390 // to the sum, so we're omitting them from the frame. 391 continue 392 } 393 frameSize++ 394 } 395 396 switch t := sum.(type) { 397 case *tree.DFloat: 398 return tree.NewDFloat(*t / tree.DFloat(frameSize)), nil 399 case *tree.DDecimal: 400 var avg tree.DDecimal 401 count := apd.New(int64(frameSize), 0) 402 _, err := tree.DecimalCtx.Quo(&avg.Decimal, &t.Decimal, count) 403 return &avg, err 404 case *tree.DInt: 405 dd := tree.DDecimal{} 406 dd.SetFinite(int64(*t), 0) 407 var avg tree.DDecimal 408 count := apd.New(int64(frameSize), 0) 409 _, err := tree.DecimalCtx.Quo(&avg.Decimal, &dd.Decimal, count) 410 return &avg, err 411 case *tree.DInterval: 412 return &tree.DInterval{Duration: t.Duration.Div(int64(frameSize))}, nil 413 default: 414 return nil, errors.AssertionFailedf("unexpected SUM result type: %s", t) 415 } 416 } 417 418 // Reset implements tree.WindowFunc interface. 419 func (w *avgWindowFunc) Reset(ctx context.Context) { 420 w.sum.Reset(ctx) 421 } 422 423 // Close implements WindowFunc interface. 424 func (w *avgWindowFunc) Close(ctx context.Context, evalCtx *tree.EvalContext) { 425 w.sum.Close(ctx, evalCtx) 426 }