github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/col_stats_map.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package props 12 13 import ( 14 "github.com/cockroachdb/cockroach/pkg/sql/opt" 15 "github.com/cockroachdb/errors" 16 ) 17 18 const ( 19 // initialColStatsCap is the initial number of column statistics that can be 20 // stored in a ColStatsMap without triggering allocations. 21 initialColStatsCap = 3 22 ) 23 24 type prefixID uint32 25 26 type colStatKey struct { 27 prefix prefixID 28 id opt.ColumnID 29 } 30 31 type colStatVal struct { 32 prefix prefixID 33 pos int32 34 } 35 36 // ColStatsMap stores a set of column statistics, each of which is keyed by the 37 // set of columns over which that statistic is defined. Statistics can be added, 38 // removed, and efficiently accessed (by opt.ColumnSet key or by ordinal 39 // position) and enumerated. 40 // 41 // Since most expressions have just a few column statistics attached to them, 42 // ColStatsMap optimizes for this case by storing the first 3 column statistics 43 // inline. Additional column statistics trigger the creation of a slice to store 44 // them, as well as a lookup index for efficient lookup by opt.ColSet. 45 // 46 // Because opt.ColSet contains a pointer, it is not useful as a map key for fast 47 // statistic lookup. So instead of directly using opt.ColSet as a key in a Go 48 // map, ColStatsMap uses a prefix tree index. Each opt.ColSet key is treated as 49 // a string of ascending opt.ColumnID values that are each hashed by its own 50 // value plus a prefix id that uniquely identifies the set of smaller values. 51 // For example, if an opt.ColSet contains (2, 3, 6), then its index looks like: 52 // 53 // (prefix: 0, id: 2) => (prefix: 1, pos: -1) 54 // └── (prefix: 1, id: 3) => (prefix: 2, pos: -1) 55 // └── (prefix: 2, id: 6) => (prefix: 3, pos: 0) 56 // 57 // Where pos is the ordinal position of the statistic in ColStatsMap, and pos=-1 58 // signifies that there is not yet any statistic for that column set. If an 59 // additional opt.ColSet containing (2, 4) is added to the index, then it shares 60 // the initial lookup node, but then diverges: 61 // 62 // (prefix: 0, id: 2) => (prefix: 1, pos: -1) 63 // ├── (prefix: 1, id: 3) => (prefix: 2, pos: -1) 64 // │ └── (prefix: 2, id: 6) => (prefix: 3, pos: 0) 65 // └── (prefix: 1, id: 4) => (prefix: 4, pos: 1) 66 // 67 // This algorithm can be implemented by a single Go map that uses efficient 68 // int64 keys and values. It requires O(N) accesses to add and find a column 69 // statistic, where N is the number of values in the column set key. 70 type ColStatsMap struct { 71 // initial is a small list of inlined column statistics. No allocations are 72 // made by ColStatsMap if all column statistics fit here. 73 initial [initialColStatsCap]ColumnStatistic 74 75 // other contains spillover column statistics that don't fit into the initial 76 // field. If this is used, then the index field will be maintained as well. 77 other []ColumnStatistic 78 79 // index implements a prefix tree for fast lookup when there are many stats 80 // in the ColStatsMap. It is only maintained when there are more column 81 // statistics than can fit into the initial field. 82 index map[colStatKey]colStatVal 83 84 // count is the number of column statistics in the ColStatsMap. 85 count int 86 87 // unique is an increasing counter that's used to generate a unique id that 88 // represents a set of opt.ColumnID values that form a prefix in the tree. 89 unique prefixID 90 } 91 92 // Count returns the number of column statistics in the map. 93 func (m *ColStatsMap) Count() int { 94 return m.count 95 } 96 97 // Get returns the nth statistic in the map, by its ordinal position. This 98 // position is stable across calls to Get or Add (but not RemoveIntersecting). 99 // NOTE: The returned *ColumnStatistic is only valid until this ColStatsMap is 100 // updated via a call to Add() or RemoveIntersecting(). At that point, 101 // the address of the statistic may have changed, so it must be fetched 102 // again using another call to Get() or Lookup(). 103 func (m *ColStatsMap) Get(nth int) *ColumnStatistic { 104 if nth < initialColStatsCap { 105 return &m.initial[nth] 106 } 107 return &m.other[nth-initialColStatsCap] 108 } 109 110 // Lookup returns the column statistic indexed by the given column set. If no 111 // such statistic exists in the map, then ok=false. 112 // NOTE: The returned *ColumnStatistic is only valid until this ColStatsMap is 113 // updated via a call to Add() or RemoveIntersecting(). At that point, 114 // the address of the statistic may have changed, so it must be fetched 115 // again using another call to Lookup() or Get(). 116 func (m *ColStatsMap) Lookup(cols opt.ColSet) (colStat *ColumnStatistic, ok bool) { 117 // Scan the inlined statistics if there are only a few statistics in the map. 118 if m.count <= initialColStatsCap { 119 for i := 0; i < m.count; i++ { 120 colStat = &m.initial[i] 121 if colStat.Cols.Equals(cols) { 122 return colStat, true 123 } 124 } 125 return nil, false 126 } 127 128 // Use the prefix tree index to look up the column statistic. 129 val := colStatVal{prefix: 0, pos: -1} 130 curr := opt.ColumnID(0) 131 for { 132 curr, ok = cols.Next(curr + 1) 133 if !ok { 134 // No more columns in set, so consult last value to determine whether 135 // a match was located. 136 if val.pos == -1 { 137 // No stat exists for this column set. 138 return nil, false 139 } 140 141 // A stat exists, so return it. 142 return m.Get(int(val.pos)), true 143 } 144 145 // Fetch index entry for next prefix+col combo. 146 key := colStatKey{prefix: val.prefix, id: curr} 147 val, ok = m.index[key] 148 if !ok { 149 // No entry exists, so lookup fails. 150 return nil, false 151 } 152 } 153 } 154 155 // Add ensures that a ColumnStatistic over the given columns is in the map. If 156 // it does not yet exist in the map, then Add adds a new blank ColumnStatistic 157 // and returns it, along with added=true. Otherwise, Add returns the existing 158 // ColumnStatistic with added=false. 159 // NOTE: The returned *ColumnStatistic is only valid until this ColStatsMap is 160 // updated via another call to Add() or RemoveIntersecting(). At that 161 // point, the address of the statistic may have changed, so it must be 162 // fetched again using Lookup() or Get(). 163 func (m *ColStatsMap) Add(cols opt.ColSet) (_ *ColumnStatistic, added bool) { 164 // Only add column set if it is not already present in the map. 165 colStat, ok := m.Lookup(cols) 166 if ok { 167 return colStat, false 168 } 169 170 if cols.Empty() { 171 panic(errors.AssertionFailedf("stats cols should never be empty")) 172 } 173 174 // Fast path for case where there are only a few stats in the map. 175 if m.count < initialColStatsCap { 176 colStat = &m.initial[m.count] 177 *colStat = ColumnStatistic{Cols: cols} 178 m.count++ 179 return colStat, true 180 } 181 182 // Fall back on map with arbitrary number of stats. 183 if m.index == nil { 184 m.other = make([]ColumnStatistic, 0, initialColStatsCap) 185 186 // Add the initial stats to the index. 187 for i := range m.initial { 188 m.addToIndex(m.initial[i].Cols, i) 189 } 190 } 191 m.other = append(m.other, ColumnStatistic{Cols: cols}) 192 colStat = &m.other[m.count-initialColStatsCap] 193 m.addToIndex(cols, m.count) 194 m.count++ 195 return colStat, true 196 } 197 198 // RemoveIntersecting scans the set of column statistics in the ColStatsMap and 199 // removes any that are defined over any of the columns in the given set. For 200 // example, if the map contains stats for (1), (1,2), and (3), then removing 201 // (1) would remove the (1) and (1,2) stats from the map. 202 func (m *ColStatsMap) RemoveIntersecting(cols opt.ColSet) { 203 // Iterate over the map, removing any stats that intersect. 204 n := 0 205 for i := 0; i < m.count; i++ { 206 colStat := m.Get(i) 207 if colStat.Cols.Intersects(cols) { 208 continue 209 } 210 211 if n < i { 212 *m.Get(n) = *colStat 213 } 214 n++ 215 } 216 217 // Update state to reflect any items that were removed. 218 if n < m.count { 219 m.count = n 220 m.index = nil 221 if n <= initialColStatsCap { 222 m.other = m.other[:0] 223 } else { 224 m.other = m.other[:n-initialColStatsCap] 225 m.rebuildIndex() 226 } 227 } 228 } 229 230 // Clear empties the map of all column statistics. 231 func (m *ColStatsMap) Clear() { 232 m.count = 0 233 m.other = m.other[:0] 234 m.index = nil 235 m.unique = 0 236 } 237 238 // addToIndex adds the column statistic at the given ordinal position to the 239 // prefix tree index. The caller must have verified that it does not yet exist 240 // in the index. 241 func (m *ColStatsMap) addToIndex(cols opt.ColSet, pos int) { 242 if m.index == nil { 243 m.index = make(map[colStatKey]colStatVal) 244 } 245 246 prefix := prefixID(0) 247 prev := opt.ColumnID(0) 248 curr, _ := cols.Next(prev) 249 for { 250 key := colStatKey{prefix: prefix, id: curr} 251 val, ok := m.index[key] 252 if ok { 253 // Index entry exists, so get its prefix value. 254 prefix = val.prefix 255 } else { 256 // No index entry exists, so create one now with a new prefix value. 257 // Initialize the "nth" field to -1, indicating that there is not yet 258 // a ColumnStatistic for the prefix of columns. 259 m.unique++ 260 prefix = m.unique 261 m.index[key] = colStatVal{prefix: prefix, pos: -1} 262 } 263 264 // Get the next column from the set. 265 prev = curr 266 curr, ok = cols.Next(curr + 1) 267 if !ok { 268 // Done adding columns, so set the "nth" field to the ordinal position 269 // of the ColumnStatistic in the map. 270 m.index[key] = colStatVal{prefix: prefix, pos: int32(pos)} 271 break 272 } 273 } 274 } 275 276 // rebuildIndex creates the prefix tree index from scratch. 277 func (m *ColStatsMap) rebuildIndex() { 278 m.index = nil 279 for i := 0; i < m.Count(); i++ { 280 m.addToIndex(m.Get(i).Cols, i) 281 } 282 } 283 284 // CopyFrom sets this map to a deep copy of another map, which can be modified 285 // independently. 286 func (m *ColStatsMap) CopyFrom(other *ColStatsMap) { 287 m.initial = other.initial 288 m.other = append([]ColumnStatistic(nil), other.other...) 289 m.count = other.count 290 m.unique = other.unique 291 292 m.index = nil 293 if other.index != nil { 294 m.index = make(map[colStatKey]colStatVal, len(other.index)) 295 for k, v := range other.index { 296 m.index[k] = v 297 } 298 } 299 }