github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/col_stats_map.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/col_stats_map.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package props
    12  
    13  import (
    14  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    15  	"github.com/cockroachdb/errors"
    16  )
    17  
    18  const (
    19  	// initialColStatsCap is the initial number of column statistics that can be
    20  	// stored in a ColStatsMap without triggering allocations.
    21  	initialColStatsCap = 3
    22  )
    23  
    24  type prefixID uint32
    25  
    26  type colStatKey struct {
    27  	prefix prefixID
    28  	id     opt.ColumnID
    29  }
    30  
    31  type colStatVal struct {
    32  	prefix prefixID
    33  	pos    int32
    34  }
    35  
    36  // ColStatsMap stores a set of column statistics, each of which is keyed by the
    37  // set of columns over which that statistic is defined. Statistics can be added,
    38  // removed, and efficiently accessed (by opt.ColumnSet key or by ordinal
    39  // position) and enumerated.
    40  //
    41  // Since most expressions have just a few column statistics attached to them,
    42  // ColStatsMap optimizes for this case by storing the first 3 column statistics
    43  // inline. Additional column statistics trigger the creation of a slice to store
    44  // them, as well as a lookup index for efficient lookup by opt.ColSet.
    45  //
    46  // Because opt.ColSet contains a pointer, it is not useful as a map key for fast
    47  // statistic lookup. So instead of directly using opt.ColSet as a key in a Go
    48  // map, ColStatsMap uses a prefix tree index. Each opt.ColSet key is treated as
    49  // a string of ascending opt.ColumnID values that are each hashed by its own
    50  // value plus a prefix id that uniquely identifies the set of smaller values.
    51  // For example, if an opt.ColSet contains (2, 3, 6), then its index looks like:
    52  //
    53  //   (prefix: 0, id: 2)           => (prefix: 1, pos: -1)
    54  //    └── (prefix: 1, id: 3)      => (prefix: 2, pos: -1)
    55  //         └── (prefix: 2, id: 6) => (prefix: 3, pos: 0)
    56  //
    57  // Where pos is the ordinal position of the statistic in ColStatsMap, and pos=-1
    58  // signifies that there is not yet any statistic for that column set. If an
    59  // additional opt.ColSet containing (2, 4) is added to the index, then it shares
    60  // the initial lookup node, but then diverges:
    61  //
    62  //   (prefix: 0, id: 2)           => (prefix: 1, pos: -1)
    63  //    ├── (prefix: 1, id: 3)      => (prefix: 2, pos: -1)
    64  //    │    └── (prefix: 2, id: 6) => (prefix: 3, pos: 0)
    65  //    └── (prefix: 1, id: 4)      => (prefix: 4, pos: 1)
    66  //
    67  // This algorithm can be implemented by a single Go map that uses efficient
    68  // int64 keys and values. It requires O(N) accesses to add and find a column
    69  // statistic, where N is the number of values in the column set key.
    70  type ColStatsMap struct {
    71  	// initial is a small list of inlined column statistics. No allocations are
    72  	// made by ColStatsMap if all column statistics fit here.
    73  	initial [initialColStatsCap]ColumnStatistic
    74  
    75  	// other contains spillover column statistics that don't fit into the initial
    76  	// field. If this is used, then the index field will be maintained as well.
    77  	other []ColumnStatistic
    78  
    79  	// index implements a prefix tree for fast lookup when there are many stats
    80  	// in the ColStatsMap. It is only maintained when there are more column
    81  	// statistics than can fit into the initial field.
    82  	index map[colStatKey]colStatVal
    83  
    84  	// count is the number of column statistics in the ColStatsMap.
    85  	count int
    86  
    87  	// unique is an increasing counter that's used to generate a unique id that
    88  	// represents a set of opt.ColumnID values that form a prefix in the tree.
    89  	unique prefixID
    90  }
    91  
    92  // Count returns the number of column statistics in the map.
    93  func (m *ColStatsMap) Count() int {
    94  	return m.count
    95  }
    96  
    97  // Get returns the nth statistic in the map, by its ordinal position. This
    98  // position is stable across calls to Get or Add (but not RemoveIntersecting).
    99  // NOTE: The returned *ColumnStatistic is only valid until this ColStatsMap is
   100  //       updated via a call to Add() or RemoveIntersecting(). At that point,
   101  //       the address of the statistic may have changed, so it must be fetched
   102  //       again using another call to Get() or Lookup().
   103  func (m *ColStatsMap) Get(nth int) *ColumnStatistic {
   104  	if nth < initialColStatsCap {
   105  		return &m.initial[nth]
   106  	}
   107  	return &m.other[nth-initialColStatsCap]
   108  }
   109  
   110  // Lookup returns the column statistic indexed by the given column set. If no
   111  // such statistic exists in the map, then ok=false.
   112  // NOTE: The returned *ColumnStatistic is only valid until this ColStatsMap is
   113  //       updated via a call to Add() or RemoveIntersecting(). At that point,
   114  //       the address of the statistic may have changed, so it must be fetched
   115  //       again using another call to Lookup() or Get().
   116  func (m *ColStatsMap) Lookup(cols opt.ColSet) (colStat *ColumnStatistic, ok bool) {
   117  	// Scan the inlined statistics if there are only a few statistics in the map.
   118  	if m.count <= initialColStatsCap {
   119  		for i := 0; i < m.count; i++ {
   120  			colStat = &m.initial[i]
   121  			if colStat.Cols.Equals(cols) {
   122  				return colStat, true
   123  			}
   124  		}
   125  		return nil, false
   126  	}
   127  
   128  	// Use the prefix tree index to look up the column statistic.
   129  	val := colStatVal{prefix: 0, pos: -1}
   130  	curr := opt.ColumnID(0)
   131  	for {
   132  		curr, ok = cols.Next(curr + 1)
   133  		if !ok {
   134  			// No more columns in set, so consult last value to determine whether
   135  			// a match was located.
   136  			if val.pos == -1 {
   137  				// No stat exists for this column set.
   138  				return nil, false
   139  			}
   140  
   141  			// A stat exists, so return it.
   142  			return m.Get(int(val.pos)), true
   143  		}
   144  
   145  		// Fetch index entry for next prefix+col combo.
   146  		key := colStatKey{prefix: val.prefix, id: curr}
   147  		val, ok = m.index[key]
   148  		if !ok {
   149  			// No entry exists, so lookup fails.
   150  			return nil, false
   151  		}
   152  	}
   153  }
   154  
   155  // Add ensures that a ColumnStatistic over the given columns is in the map. If
   156  // it does not yet exist in the map, then Add adds a new blank ColumnStatistic
   157  // and returns it, along with added=true. Otherwise, Add returns the existing
   158  // ColumnStatistic with added=false.
   159  // NOTE: The returned *ColumnStatistic is only valid until this ColStatsMap is
   160  //       updated via another call to Add() or RemoveIntersecting(). At that
   161  //       point, the address of the statistic may have changed, so it must be
   162  //       fetched again using Lookup() or Get().
   163  func (m *ColStatsMap) Add(cols opt.ColSet) (_ *ColumnStatistic, added bool) {
   164  	// Only add column set if it is not already present in the map.
   165  	colStat, ok := m.Lookup(cols)
   166  	if ok {
   167  		return colStat, false
   168  	}
   169  
   170  	if cols.Empty() {
   171  		panic(errors.AssertionFailedf("stats cols should never be empty"))
   172  	}
   173  
   174  	// Fast path for case where there are only a few stats in the map.
   175  	if m.count < initialColStatsCap {
   176  		colStat = &m.initial[m.count]
   177  		*colStat = ColumnStatistic{Cols: cols}
   178  		m.count++
   179  		return colStat, true
   180  	}
   181  
   182  	// Fall back on map with arbitrary number of stats.
   183  	if m.index == nil {
   184  		m.other = make([]ColumnStatistic, 0, initialColStatsCap)
   185  
   186  		// Add the initial stats to the index.
   187  		for i := range m.initial {
   188  			m.addToIndex(m.initial[i].Cols, i)
   189  		}
   190  	}
   191  	m.other = append(m.other, ColumnStatistic{Cols: cols})
   192  	colStat = &m.other[m.count-initialColStatsCap]
   193  	m.addToIndex(cols, m.count)
   194  	m.count++
   195  	return colStat, true
   196  }
   197  
   198  // RemoveIntersecting scans the set of column statistics in the ColStatsMap and
   199  // removes any that are defined over any of the columns in the given set. For
   200  // example, if the map contains stats for (1), (1,2), and (3), then removing
   201  // (1) would remove the (1) and (1,2) stats from the map.
   202  func (m *ColStatsMap) RemoveIntersecting(cols opt.ColSet) {
   203  	// Iterate over the map, removing any stats that intersect.
   204  	n := 0
   205  	for i := 0; i < m.count; i++ {
   206  		colStat := m.Get(i)
   207  		if colStat.Cols.Intersects(cols) {
   208  			continue
   209  		}
   210  
   211  		if n < i {
   212  			*m.Get(n) = *colStat
   213  		}
   214  		n++
   215  	}
   216  
   217  	// Update state to reflect any items that were removed.
   218  	if n < m.count {
   219  		m.count = n
   220  		m.index = nil
   221  		if n <= initialColStatsCap {
   222  			m.other = m.other[:0]
   223  		} else {
   224  			m.other = m.other[:n-initialColStatsCap]
   225  			m.rebuildIndex()
   226  		}
   227  	}
   228  }
   229  
   230  // Clear empties the map of all column statistics.
   231  func (m *ColStatsMap) Clear() {
   232  	m.count = 0
   233  	m.other = m.other[:0]
   234  	m.index = nil
   235  	m.unique = 0
   236  }
   237  
   238  // addToIndex adds the column statistic at the given ordinal position to the
   239  // prefix tree index. The caller must have verified that it does not yet exist
   240  // in the index.
   241  func (m *ColStatsMap) addToIndex(cols opt.ColSet, pos int) {
   242  	if m.index == nil {
   243  		m.index = make(map[colStatKey]colStatVal)
   244  	}
   245  
   246  	prefix := prefixID(0)
   247  	prev := opt.ColumnID(0)
   248  	curr, _ := cols.Next(prev)
   249  	for {
   250  		key := colStatKey{prefix: prefix, id: curr}
   251  		val, ok := m.index[key]
   252  		if ok {
   253  			// Index entry exists, so get its prefix value.
   254  			prefix = val.prefix
   255  		} else {
   256  			// No index entry exists, so create one now with a new prefix value.
   257  			// Initialize the "nth" field to -1, indicating that there is not yet
   258  			// a ColumnStatistic for the prefix of columns.
   259  			m.unique++
   260  			prefix = m.unique
   261  			m.index[key] = colStatVal{prefix: prefix, pos: -1}
   262  		}
   263  
   264  		// Get the next column from the set.
   265  		prev = curr
   266  		curr, ok = cols.Next(curr + 1)
   267  		if !ok {
   268  			// Done adding columns, so set the "nth" field to the ordinal position
   269  			// of the ColumnStatistic in the map.
   270  			m.index[key] = colStatVal{prefix: prefix, pos: int32(pos)}
   271  			break
   272  		}
   273  	}
   274  }
   275  
   276  // rebuildIndex creates the prefix tree index from scratch.
   277  func (m *ColStatsMap) rebuildIndex() {
   278  	m.index = nil
   279  	for i := 0; i < m.Count(); i++ {
   280  		m.addToIndex(m.Get(i).Cols, i)
   281  	}
   282  }
   283  
   284  // CopyFrom sets this map to a deep copy of another map, which can be modified
   285  // independently.
   286  func (m *ColStatsMap) CopyFrom(other *ColStatsMap) {
   287  	m.initial = other.initial
   288  	m.other = append([]ColumnStatistic(nil), other.other...)
   289  	m.count = other.count
   290  	m.unique = other.unique
   291  
   292  	m.index = nil
   293  	if other.index != nil {
   294  		m.index = make(map[colStatKey]colStatVal, len(other.index))
   295  		for k, v := range other.index {
   296  			m.index[k] = v
   297  		}
   298  	}
   299  }