github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sem/tree/window_funcs_util.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package tree 12 13 import "github.com/cockroachdb/cockroach/pkg/util/ring" 14 15 // PeerGroupChecker can check if a pair of row indices within a partition are 16 // in the same peer group. It also returns an error if it occurs while checking 17 // the peer groups. 18 type PeerGroupChecker interface { 19 InSameGroup(i, j int) (bool, error) 20 } 21 22 // peerGroup contains information about a single peer group. 23 type peerGroup struct { 24 firstPeerIdx int 25 rowCount int 26 } 27 28 // PeerGroupsIndicesHelper computes peer groups using the given 29 // PeerGroupChecker. In ROWS and RANGE modes, it processes one peer group at 30 // a time and stores information only about single peer group. In GROUPS mode, 31 // it's behavior depends on the frame bounds; in the worst case, it stores 32 // max(F, O) peer groups at the same time, where F is the maximum number of 33 // peer groups within the frame at any point and O is the maximum of two 34 // offsets if we have OFFSET_FOLLOWING type of bound (both F and O are 35 // upper-bounded by total number of peer groups). 36 type PeerGroupsIndicesHelper struct { 37 groups ring.Buffer // queue of peer groups 38 peerGrouper PeerGroupChecker 39 headPeerGroupNum int // number of the peer group at the head of the queue 40 allPeerGroupsSkipped bool // in GROUP mode, indicates whether all peer groups were skipped during Init 41 allRowsProcessed bool // indicates whether peer groups for all rows within partition have been already computed 42 unboundedFollowing int // index of the first row after all rows of the partition 43 } 44 45 // Init computes all peer groups necessary to perform calculations of a window 46 // function over the first row of the partition. It returns any error if it 47 // occurs. 48 func (p *PeerGroupsIndicesHelper) Init(wfr *WindowFrameRun, peerGrouper PeerGroupChecker) error { 49 // We first reset the helper to reuse the same one for all partitions when 50 // computing a particular window function. 51 p.groups.Reset() 52 p.headPeerGroupNum = 0 53 p.allPeerGroupsSkipped = false 54 p.allRowsProcessed = false 55 p.unboundedFollowing = wfr.unboundedFollowing() 56 57 var group *peerGroup 58 p.peerGrouper = peerGrouper 59 startIdxOfFirstPeerGroupWithinFrame := 0 60 if wfr.Frame != nil && wfr.Frame.Mode == GROUPS && wfr.Frame.Bounds.StartBound.BoundType == OffsetFollowing { 61 // In GROUPS mode with OFFSET_FOLLOWING as a start bound, 'peerGroupOffset' 62 // number of peer groups needs to be processed upfront before we get to 63 // peer groups that will be within a frame of the first row. 64 // If start bound is of type: 65 // - UNBOUNDED_PRECEDING - we don't use this helper at all 66 // - OFFSET_PRECEDING - no need to process any peer groups upfront 67 // - CURRENT_ROW - no need to process any peer groups upfront 68 // - OFFSET_FOLLOWING - processing is done here 69 // - UNBOUNDED_FOLLOWING - invalid as a start bound 70 // 71 // We also cannot simply discard information about these peer groups: even 72 // though they will never be within frames of any rows, we still might need 73 // information about them. For example, with frame as follows: 74 // GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING 75 // when processing the rows from zeroth peer group, we will need to know 76 // where zeroth peer group starts and how many rows it has, but the rows of 77 // zeroth group will never be in any frame. 78 peerGroupOffset := int(MustBeDInt(wfr.StartBoundOffset)) 79 group = &peerGroup{firstPeerIdx: 0, rowCount: 1} 80 for group.firstPeerIdx < wfr.PartitionSize() && p.groups.Len() < peerGroupOffset { 81 p.groups.AddLast(group) 82 for ; group.firstPeerIdx+group.rowCount < wfr.PartitionSize(); group.rowCount++ { 83 idx := group.firstPeerIdx + group.rowCount 84 if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil { 85 return err 86 } else if !sameGroup { 87 break 88 } 89 } 90 group = &peerGroup{firstPeerIdx: group.firstPeerIdx + group.rowCount, rowCount: 1} 91 } 92 93 if group.firstPeerIdx == wfr.PartitionSize() { 94 // Frame starts after all peer groups of the partition. 95 p.allPeerGroupsSkipped = true 96 return nil 97 } 98 99 startIdxOfFirstPeerGroupWithinFrame = group.firstPeerIdx 100 } 101 102 // Compute the first peer group that is within the frame. 103 group = &peerGroup{firstPeerIdx: startIdxOfFirstPeerGroupWithinFrame, rowCount: 1} 104 p.groups.AddLast(group) 105 for ; group.firstPeerIdx+group.rowCount < wfr.PartitionSize(); group.rowCount++ { 106 idx := group.firstPeerIdx + group.rowCount 107 if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil { 108 return err 109 } else if !sameGroup { 110 break 111 } 112 } 113 if group.firstPeerIdx+group.rowCount == wfr.PartitionSize() { 114 p.allRowsProcessed = true 115 return nil 116 } 117 118 if wfr.Frame != nil && wfr.Frame.Mode == GROUPS && wfr.Frame.Bounds.EndBound != nil && wfr.Frame.Bounds.EndBound.BoundType == OffsetFollowing { 119 // In GROUPS mode, 'peerGroupOffset' number of peer groups need to be 120 // processed upfront because they are within the frame of the first row. 121 // If end bound is of type: 122 // - UNBOUNDED_PRECEDING - invalid as an end bound 123 // - OFFSET_PRECEDING - no need to process any peer groups upfront 124 // - CURRENT_ROW - no need to process any more peer groups upfront 125 // - OFFSET_FOLLOWING - processing is done here 126 // - UNBOUNDED_FOLLOWING - we don't use this helper at all 127 peerGroupOffset := int(MustBeDInt(wfr.EndBoundOffset)) 128 group = &peerGroup{firstPeerIdx: group.firstPeerIdx + group.rowCount, rowCount: 1} 129 for group.firstPeerIdx < wfr.PartitionSize() && p.groups.Len() <= peerGroupOffset { 130 p.groups.AddLast(group) 131 for ; group.firstPeerIdx+group.rowCount < wfr.PartitionSize(); group.rowCount++ { 132 idx := group.firstPeerIdx + group.rowCount 133 if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil { 134 return err 135 } else if !sameGroup { 136 break 137 } 138 } 139 group = &peerGroup{firstPeerIdx: group.firstPeerIdx + group.rowCount, rowCount: 1} 140 } 141 if group.firstPeerIdx == wfr.PartitionSize() { 142 p.allRowsProcessed = true 143 } 144 } 145 return nil 146 } 147 148 // Update should be called after a window function has been computed over all 149 // rows in wfr.CurRowPeerGroupNum peer group. If not all rows have been already 150 // processed, it computes the next peer group. It returns any error if it 151 // occurs. 152 func (p *PeerGroupsIndicesHelper) Update(wfr *WindowFrameRun) error { 153 if p.allPeerGroupsSkipped { 154 // No peer groups to process. 155 return nil 156 } 157 158 // nextPeerGroupStartIdx is the index of the first row that we haven't 159 // computed peer group for. 160 lastPeerGroup := p.groups.GetLast().(*peerGroup) 161 nextPeerGroupStartIdx := lastPeerGroup.firstPeerIdx + lastPeerGroup.rowCount 162 163 if (wfr.Frame == nil || wfr.Frame.Mode == ROWS || wfr.Frame.Mode == RANGE) || 164 (wfr.Frame.Bounds.StartBound.BoundType == OffsetPreceding && wfr.CurRowPeerGroupNum-p.headPeerGroupNum > int(MustBeDInt(wfr.StartBoundOffset)) || 165 wfr.Frame.Bounds.StartBound.BoundType == CurrentRow || 166 (wfr.Frame.Bounds.StartBound.BoundType == OffsetFollowing && p.headPeerGroupNum-wfr.CurRowPeerGroupNum > int(MustBeDInt(wfr.StartBoundOffset)))) { 167 // With default frame, ROWS or RANGE mode, we want to "discard" the only 168 // peer group that we're storing information about. In GROUPS mode, with 169 // start bound of type: 170 // - OFFSET_PRECEDING we want to start discarding the "earliest" peer group 171 // only when the number of current row's peer group differs from the 172 // number of the earliest one by more than offset 173 // - CURRENT_ROW we want to discard the earliest peer group 174 // - OFFSET_FOLLOWING we want to start discarding the "earliest" peer group 175 // only when the number of current row's peer group differs from the 176 // number of the earliest one by more than offset 177 p.groups.RemoveFirst() 178 p.headPeerGroupNum++ 179 } 180 181 if p.allRowsProcessed { 182 // No more peer groups to process. 183 return nil 184 } 185 186 // Compute the next peer group that is just entering the frame. 187 peerGroup := &peerGroup{firstPeerIdx: nextPeerGroupStartIdx, rowCount: 1} 188 p.groups.AddLast(peerGroup) 189 for ; peerGroup.firstPeerIdx+peerGroup.rowCount < wfr.PartitionSize(); peerGroup.rowCount++ { 190 idx := peerGroup.firstPeerIdx + peerGroup.rowCount 191 if sameGroup, err := p.peerGrouper.InSameGroup(idx-1, idx); err != nil { 192 return err 193 } else if !sameGroup { 194 break 195 } 196 } 197 if peerGroup.firstPeerIdx+peerGroup.rowCount == wfr.PartitionSize() { 198 p.allRowsProcessed = true 199 } 200 return nil 201 } 202 203 // GetFirstPeerIdx returns index of the first peer within peer group of number 204 // peerGroupNum (counting from 0). 205 func (p *PeerGroupsIndicesHelper) GetFirstPeerIdx(peerGroupNum int) int { 206 posInBuffer := peerGroupNum - p.headPeerGroupNum 207 if posInBuffer < 0 || p.groups.Len() < posInBuffer { 208 panic("peerGroupNum out of bounds") 209 } 210 return p.groups.Get(posInBuffer).(*peerGroup).firstPeerIdx 211 } 212 213 // GetRowCount returns the number of rows within peer group of number 214 // peerGroupNum (counting from 0). 215 func (p *PeerGroupsIndicesHelper) GetRowCount(peerGroupNum int) int { 216 posInBuffer := peerGroupNum - p.headPeerGroupNum 217 if posInBuffer < 0 || p.groups.Len() < posInBuffer { 218 panic("peerGroupNum out of bounds") 219 } 220 return p.groups.Get(posInBuffer).(*peerGroup).rowCount 221 } 222 223 // GetLastPeerGroupNum returns the number of the last peer group in the queue. 224 func (p *PeerGroupsIndicesHelper) GetLastPeerGroupNum() int { 225 if p.groups.Len() == 0 { 226 panic("GetLastPeerGroupNum on empty RingBuffer") 227 } 228 return p.headPeerGroupNum + p.groups.Len() - 1 229 }