github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sem/tree/window_funcs_util.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tree
    12  
    13  import "github.com/cockroachdb/cockroach/pkg/util/ring"
    14  
    15  // PeerGroupChecker can check if a pair of row indices within a partition are
    16  // in the same peer group. It also returns an error if it occurs while checking
    17  // the peer groups.
    18  type PeerGroupChecker interface {
    19  	InSameGroup(i, j int) (bool, error)
    20  }
    21  
    22  // peerGroup contains information about a single peer group.
    23  type peerGroup struct {
    24  	firstPeerIdx int
    25  	rowCount     int
    26  }
    27  
    28  // PeerGroupsIndicesHelper computes peer groups using the given
    29  // PeerGroupChecker. In ROWS and RANGE modes, it processes one peer group at
    30  // a time and stores information only about single peer group. In GROUPS mode,
    31  // it's behavior depends on the frame bounds; in the worst case, it stores
    32  // max(F, O) peer groups at the same time, where F is the maximum number of
    33  // peer groups within the frame at any point and O is the maximum of two
    34  // offsets if we have OFFSET_FOLLOWING type of bound (both F and O are
    35  // upper-bounded by total number of peer groups).
    36  type PeerGroupsIndicesHelper struct {
    37  	groups               ring.Buffer // queue of peer groups
    38  	peerGrouper          PeerGroupChecker
    39  	headPeerGroupNum     int  // number of the peer group at the head of the queue
    40  	allPeerGroupsSkipped bool // in GROUP mode, indicates whether all peer groups were skipped during Init
    41  	allRowsProcessed     bool // indicates whether peer groups for all rows within partition have been already computed
    42  	unboundedFollowing   int  // index of the first row after all rows of the partition
    43  }
    44  
    45  // Init computes all peer groups necessary to perform calculations of a window
    46  // function over the first row of the partition. It returns any error if it
    47  // occurs.
    48  func (p *PeerGroupsIndicesHelper) Init(wfr *WindowFrameRun, peerGrouper PeerGroupChecker) error {
    49  	// We first reset the helper to reuse the same one for all partitions when
    50  	// computing a particular window function.
    51  	p.groups.Reset()
    52  	p.headPeerGroupNum = 0
    53  	p.allPeerGroupsSkipped = false
    54  	p.allRowsProcessed = false
    55  	p.unboundedFollowing = wfr.unboundedFollowing()
    56  
    57  	var group *peerGroup
    58  	p.peerGrouper = peerGrouper
    59  	startIdxOfFirstPeerGroupWithinFrame := 0
    60  	if wfr.Frame != nil && wfr.Frame.Mode == GROUPS && wfr.Frame.Bounds.StartBound.BoundType == OffsetFollowing {
    61  		// In GROUPS mode with OFFSET_FOLLOWING as a start bound, 'peerGroupOffset'
    62  		// number of peer groups needs to be processed upfront before we get to
    63  		// peer groups that will be within a frame of the first row.
    64  		// If start bound is of type:
    65  		// - UNBOUNDED_PRECEDING - we don't use this helper at all
    66  		// - OFFSET_PRECEDING - no need to process any peer groups upfront
    67  		// - CURRENT_ROW - no need to process any peer groups upfront
    68  		// - OFFSET_FOLLOWING - processing is done here
    69  		// - UNBOUNDED_FOLLOWING - invalid as a start bound
    70  		//
    71  		// We also cannot simply discard information about these peer groups: even
    72  		// though they will never be within frames of any rows, we still might need
    73  		// information about them. For example, with frame as follows:
    74  		//   GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING
    75  		// when processing the rows from zeroth peer group, we will need to know
    76  		// where zeroth peer group starts and how many rows it has, but the rows of
    77  		// zeroth group will never be in any frame.
    78  		peerGroupOffset := int(MustBeDInt(wfr.StartBoundOffset))
    79  		group = &peerGroup{firstPeerIdx: 0, rowCount: 1}
    80  		for group.firstPeerIdx < wfr.PartitionSize() && p.groups.Len() < peerGroupOffset {
    81  			p.groups.AddLast(group)
    82  			for ; group.firstPeerIdx+group.rowCount < wfr.PartitionSize(); group.rowCount++ {
    83  				idx := group.firstPeerIdx + group.rowCount
    84  				if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil {
    85  					return err
    86  				} else if !sameGroup {
    87  					break
    88  				}
    89  			}
    90  			group = &peerGroup{firstPeerIdx: group.firstPeerIdx + group.rowCount, rowCount: 1}
    91  		}
    92  
    93  		if group.firstPeerIdx == wfr.PartitionSize() {
    94  			// Frame starts after all peer groups of the partition.
    95  			p.allPeerGroupsSkipped = true
    96  			return nil
    97  		}
    98  
    99  		startIdxOfFirstPeerGroupWithinFrame = group.firstPeerIdx
   100  	}
   101  
   102  	// Compute the first peer group that is within the frame.
   103  	group = &peerGroup{firstPeerIdx: startIdxOfFirstPeerGroupWithinFrame, rowCount: 1}
   104  	p.groups.AddLast(group)
   105  	for ; group.firstPeerIdx+group.rowCount < wfr.PartitionSize(); group.rowCount++ {
   106  		idx := group.firstPeerIdx + group.rowCount
   107  		if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil {
   108  			return err
   109  		} else if !sameGroup {
   110  			break
   111  		}
   112  	}
   113  	if group.firstPeerIdx+group.rowCount == wfr.PartitionSize() {
   114  		p.allRowsProcessed = true
   115  		return nil
   116  	}
   117  
   118  	if wfr.Frame != nil && wfr.Frame.Mode == GROUPS && wfr.Frame.Bounds.EndBound != nil && wfr.Frame.Bounds.EndBound.BoundType == OffsetFollowing {
   119  		// In GROUPS mode, 'peerGroupOffset' number of peer groups need to be
   120  		// processed upfront because they are within the frame of the first row.
   121  		// If end bound is of type:
   122  		// - UNBOUNDED_PRECEDING - invalid as an end bound
   123  		// - OFFSET_PRECEDING - no need to process any peer groups upfront
   124  		// - CURRENT_ROW - no need to process any more peer groups upfront
   125  		// - OFFSET_FOLLOWING - processing is done here
   126  		// - UNBOUNDED_FOLLOWING - we don't use this helper at all
   127  		peerGroupOffset := int(MustBeDInt(wfr.EndBoundOffset))
   128  		group = &peerGroup{firstPeerIdx: group.firstPeerIdx + group.rowCount, rowCount: 1}
   129  		for group.firstPeerIdx < wfr.PartitionSize() && p.groups.Len() <= peerGroupOffset {
   130  			p.groups.AddLast(group)
   131  			for ; group.firstPeerIdx+group.rowCount < wfr.PartitionSize(); group.rowCount++ {
   132  				idx := group.firstPeerIdx + group.rowCount
   133  				if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil {
   134  					return err
   135  				} else if !sameGroup {
   136  					break
   137  				}
   138  			}
   139  			group = &peerGroup{firstPeerIdx: group.firstPeerIdx + group.rowCount, rowCount: 1}
   140  		}
   141  		if group.firstPeerIdx == wfr.PartitionSize() {
   142  			p.allRowsProcessed = true
   143  		}
   144  	}
   145  	return nil
   146  }
   147  
   148  // Update should be called after a window function has been computed over all
   149  // rows in wfr.CurRowPeerGroupNum peer group. If not all rows have been already
   150  // processed, it computes the next peer group. It returns any error if it
   151  // occurs.
   152  func (p *PeerGroupsIndicesHelper) Update(wfr *WindowFrameRun) error {
   153  	if p.allPeerGroupsSkipped {
   154  		// No peer groups to process.
   155  		return nil
   156  	}
   157  
   158  	// nextPeerGroupStartIdx is the index of the first row that we haven't
   159  	// computed peer group for.
   160  	lastPeerGroup := p.groups.GetLast().(*peerGroup)
   161  	nextPeerGroupStartIdx := lastPeerGroup.firstPeerIdx + lastPeerGroup.rowCount
   162  
   163  	if (wfr.Frame == nil || wfr.Frame.Mode == ROWS || wfr.Frame.Mode == RANGE) ||
   164  		(wfr.Frame.Bounds.StartBound.BoundType == OffsetPreceding && wfr.CurRowPeerGroupNum-p.headPeerGroupNum > int(MustBeDInt(wfr.StartBoundOffset)) ||
   165  			wfr.Frame.Bounds.StartBound.BoundType == CurrentRow ||
   166  			(wfr.Frame.Bounds.StartBound.BoundType == OffsetFollowing && p.headPeerGroupNum-wfr.CurRowPeerGroupNum > int(MustBeDInt(wfr.StartBoundOffset)))) {
   167  		// With default frame, ROWS or RANGE mode, we want to "discard" the only
   168  		// peer group that we're storing information about. In GROUPS mode, with
   169  		// start bound of type:
   170  		// - OFFSET_PRECEDING we want to start discarding the "earliest" peer group
   171  		//   only when the number of current row's peer group differs from the
   172  		//   number of the earliest one by more than offset
   173  		// - CURRENT_ROW we want to discard the earliest peer group
   174  		// - OFFSET_FOLLOWING we want to start discarding the "earliest" peer group
   175  		//	 only when the number of current row's peer group differs from the
   176  		//	 number of the earliest one by more than offset
   177  		p.groups.RemoveFirst()
   178  		p.headPeerGroupNum++
   179  	}
   180  
   181  	if p.allRowsProcessed {
   182  		// No more peer groups to process.
   183  		return nil
   184  	}
   185  
   186  	// Compute the next peer group that is just entering the frame.
   187  	peerGroup := &peerGroup{firstPeerIdx: nextPeerGroupStartIdx, rowCount: 1}
   188  	p.groups.AddLast(peerGroup)
   189  	for ; peerGroup.firstPeerIdx+peerGroup.rowCount < wfr.PartitionSize(); peerGroup.rowCount++ {
   190  		idx := peerGroup.firstPeerIdx + peerGroup.rowCount
   191  		if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil {
   192  			return err
   193  		} else if !sameGroup {
   194  			break
   195  		}
   196  	}
   197  	if peerGroup.firstPeerIdx+peerGroup.rowCount == wfr.PartitionSize() {
   198  		p.allRowsProcessed = true
   199  	}
   200  	return nil
   201  }
   202  
   203  // GetFirstPeerIdx returns index of the first peer within peer group of number
   204  // peerGroupNum (counting from 0).
   205  func (p *PeerGroupsIndicesHelper) GetFirstPeerIdx(peerGroupNum int) int {
   206  	posInBuffer := peerGroupNum - p.headPeerGroupNum
   207  	if posInBuffer < 0 || p.groups.Len() < posInBuffer {
   208  		panic("peerGroupNum out of bounds")
   209  	}
   210  	return p.groups.Get(posInBuffer).(*peerGroup).firstPeerIdx
   211  }
   212  
   213  // GetRowCount returns the number of rows within peer group of number
   214  // peerGroupNum (counting from 0).
   215  func (p *PeerGroupsIndicesHelper) GetRowCount(peerGroupNum int) int {
   216  	posInBuffer := peerGroupNum - p.headPeerGroupNum
   217  	if posInBuffer < 0 || p.groups.Len() < posInBuffer {
   218  		panic("peerGroupNum out of bounds")
   219  	}
   220  	return p.groups.Get(posInBuffer).(*peerGroup).rowCount
   221  }
   222  
   223  // GetLastPeerGroupNum returns the number of the last peer group in the queue.
   224  func (p *PeerGroupsIndicesHelper) GetLastPeerGroupNum() int {
   225  	if p.groups.Len() == 0 {
   226  		panic("GetLastPeerGroupNum on empty RingBuffer")
   227  	}
   228  	return p.headPeerGroupNum + p.groups.Len() - 1
   229  }