github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/schema/col_coll.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package schema
    16  
    17  import (
    18  	"errors"
    19  	"sort"
    20  	"strings"
    21  )
    22  
    23  // ErrColTagCollision is an error that is returned when two columns within a ColCollection have the same tag
    24  // but a different name or type
    25  var ErrColTagCollision = errors.New("two different columns with the same tag")
    26  
    27  // ErrColNotFound is an error that is returned when attempting an operation on a column that does not exist
    28  var ErrColNotFound = errors.New("column not found")
    29  
    30  // ErrColNameCollision is an error that is returned when two columns within a ColCollection have the same name but a
    31  // different type or tag
    32  var ErrColNameCollision = errors.New("two different columns with the same name exist")
    33  
    34  // ErrNoPrimaryKeyColumns is an error that is returned when no primary key columns are found
    35  var ErrNoPrimaryKeyColumns = errors.New("no primary key columns")
    36  
    37  var ErrNonAutoIncType = errors.New("column type cannot be auto incremented")
    38  
    39  var EmptyColColl = &ColCollection{
    40  	cols:           []Column{},
    41  	Tags:           []uint64{},
    42  	SortedTags:     []uint64{},
    43  	TagToCol:       map[uint64]Column{},
    44  	NameToCol:      map[string]Column{},
    45  	LowerNameToCol: map[string]Column{},
    46  	TagToIdx:       map[uint64]int{},
    47  }
    48  
    49  // ColCollection is a collection of columns. As a stand-alone collection, all columns in the collection must have unique
    50  // tags. To be instantiated as a schema for writing to the database, names must also be unique.
    51  // See schema.ValidateForInsert for details.
    52  type ColCollection struct {
    53  	cols []Column
    54  	// virtualColumns stores the indexes of any virtual columns in the collection
    55  	virtualColumns []int
    56  	// storedIndexes stores the indexes of the stored columns in the collection
    57  	storedIndexes []int
    58  	// Tags is a list of all the tags in the ColCollection in their original order.
    59  	Tags []uint64
    60  	// SortedTags is a list of all the tags in the ColCollection in sorted order.
    61  	SortedTags []uint64
    62  	// TagToCol is a map of tag to column
    63  	TagToCol map[uint64]Column
    64  	// NameToCol is a map from name to column
    65  	NameToCol map[string]Column
    66  	// LowerNameToCol is a map from lower-cased name to column
    67  	LowerNameToCol map[string]Column
    68  	// TagToIdx is a map from a tag to the column index
    69  	TagToIdx map[uint64]int
    70  	// tagToStorageIndex is a map from a tag to the physical storage column index
    71  	tagToStorageIndex map[uint64]int
    72  }
    73  
    74  // NewColCollection creates a new collection from a list of columns. If any columns have the same tag, by-tag lookups in
    75  // this collection will not function correctly. If any columns have the same name, by-name lookups from this collection
    76  // will not function correctly. If any columns have the same case-insensitive name, case-insensitive lookups will be
    77  // unable to return the correct column in all cases.
    78  // For this collection to be used as a Dolt schema, it must pass schema.ValidateForInsert.
    79  func NewColCollection(cols ...Column) *ColCollection {
    80  	var tags []uint64
    81  	var sortedTags []uint64
    82  
    83  	tagToCol := make(map[uint64]Column, len(cols))
    84  	nameToCol := make(map[string]Column, len(cols))
    85  	lowerNameToCol := make(map[string]Column, len(cols))
    86  	tagToIdx := make(map[uint64]int, len(cols))
    87  	tagToStorageIndex := make(map[uint64]int, len(cols))
    88  	var virtualColumns []int
    89  
    90  	var columns []Column
    91  	var storedIndexes []int
    92  	storageIdx := 0
    93  	for i, col := range cols {
    94  		// If multiple columns have the same tag, the last one is used for tag lookups.
    95  		// Columns must have unique tags to pass schema.ValidateForInsert.
    96  		columns = append(columns, col)
    97  		tagToCol[col.Tag] = col
    98  		tagToIdx[col.Tag] = i
    99  		tags = append(tags, col.Tag)
   100  		sortedTags = append(sortedTags, col.Tag)
   101  		nameToCol[col.Name] = cols[i]
   102  
   103  		// If multiple columns have the same lower case name, the first one is used for case-insensitive matching.
   104  		// Column names must all be case-insensitive different to pass schema.ValidateForInsert.
   105  		lowerCaseName := strings.ToLower(col.Name)
   106  		if _, ok := lowerNameToCol[lowerCaseName]; !ok {
   107  			lowerNameToCol[lowerCaseName] = cols[i]
   108  		}
   109  
   110  		if col.Virtual {
   111  			virtualColumns = append(virtualColumns, i)
   112  		} else {
   113  			storedIndexes = append(storedIndexes, i)
   114  			tagToStorageIndex[col.Tag] = storageIdx
   115  			storageIdx++
   116  		}
   117  	}
   118  
   119  	sort.Slice(sortedTags, func(i, j int) bool { return sortedTags[i] < sortedTags[j] })
   120  
   121  	return &ColCollection{
   122  		cols:              columns,
   123  		virtualColumns:    virtualColumns,
   124  		storedIndexes:     storedIndexes,
   125  		tagToStorageIndex: tagToStorageIndex,
   126  		Tags:              tags,
   127  		SortedTags:        sortedTags,
   128  		TagToCol:          tagToCol,
   129  		NameToCol:         nameToCol,
   130  		LowerNameToCol:    lowerNameToCol,
   131  		TagToIdx:          tagToIdx,
   132  	}
   133  }
   134  
   135  // GetColumns returns the underlying list of columns. The list returned is a copy.
   136  func (cc *ColCollection) GetColumns() []Column {
   137  	colsCopy := make([]Column, len(cc.cols))
   138  	copy(colsCopy, cc.cols)
   139  	return colsCopy
   140  }
   141  
   142  // GetColumnNames returns a list of names of the columns.
   143  func (cc *ColCollection) GetColumnNames() []string {
   144  	names := make([]string, len(cc.cols))
   145  	for i, col := range cc.cols {
   146  		names[i] = col.Name
   147  	}
   148  	return names
   149  }
   150  
   151  // AppendColl returns a new collection with the additional ColCollection's columns appended
   152  func (cc *ColCollection) AppendColl(colColl *ColCollection) *ColCollection {
   153  	return cc.Append(colColl.cols...)
   154  }
   155  
   156  // Append returns a new collection with the additional columns appended
   157  func (cc *ColCollection) Append(cols ...Column) *ColCollection {
   158  	allCols := make([]Column, 0, len(cols)+len(cc.cols))
   159  	allCols = append(allCols, cc.cols...)
   160  	allCols = append(allCols, cols...)
   161  
   162  	return NewColCollection(allCols...)
   163  }
   164  
   165  // IndexOf returns the index of the column with the name given (case-insensitive) or -1 if it's not found
   166  func (cc *ColCollection) IndexOf(colName string) int {
   167  	idx := -1
   168  
   169  	var i = 0
   170  	_ = cc.Iter(func(tag uint64, col Column) (stop bool, err error) {
   171  		defer func() {
   172  			i++
   173  		}()
   174  		if strings.ToLower(col.Name) == strings.ToLower(colName) {
   175  			idx = i
   176  			stop = true
   177  		}
   178  		return
   179  	})
   180  
   181  	return idx
   182  }
   183  
   184  // Iter iterates over all the columns in the supplied ordering
   185  func (cc *ColCollection) Iter(cb func(tag uint64, col Column) (stop bool, err error)) error {
   186  	for _, col := range cc.cols {
   187  		if stop, err := cb(col.Tag, col); err != nil {
   188  			return err
   189  		} else if stop {
   190  			break
   191  		}
   192  	}
   193  
   194  	return nil
   195  }
   196  
   197  // IterInSortedOrder iterates over all the columns from lowest tag to highest tag.
   198  func (cc *ColCollection) IterInSortedOrder(cb func(tag uint64, col Column) (stop bool)) {
   199  	for _, tag := range cc.SortedTags {
   200  		val := cc.TagToCol[tag]
   201  		if stop := cb(tag, val); stop {
   202  			break
   203  		}
   204  	}
   205  }
   206  
   207  // GetByName takes the name of a column and returns the column and true if found. Otherwise InvalidCol and false are
   208  // returned.
   209  func (cc *ColCollection) GetByName(name string) (Column, bool) {
   210  	val, ok := cc.NameToCol[name]
   211  
   212  	if ok {
   213  		return val, true
   214  	}
   215  
   216  	return InvalidCol, false
   217  }
   218  
   219  // GetByNameCaseInsensitive takes the name of a column and returns the column and true if there is a column with that
   220  // name ignoring case. Otherwise InvalidCol and false are returned. If multiple columns have the same case-insensitive
   221  // name, the first declared one is returned.
   222  func (cc *ColCollection) GetByNameCaseInsensitive(name string) (Column, bool) {
   223  	val, ok := cc.LowerNameToCol[strings.ToLower(name)]
   224  
   225  	if ok {
   226  		return val, true
   227  	}
   228  
   229  	return InvalidCol, false
   230  }
   231  
   232  // GetByTag takes a tag and returns the corresponding column and true if found, otherwise InvalidCol and false are
   233  // returned
   234  func (cc *ColCollection) GetByTag(tag uint64) (Column, bool) {
   235  	val, ok := cc.TagToCol[tag]
   236  
   237  	if ok {
   238  		return val, true
   239  	}
   240  
   241  	return InvalidCol, false
   242  }
   243  
   244  // GetByIndex returns the Nth column in the collection
   245  func (cc *ColCollection) GetByIndex(idx int) Column {
   246  	return cc.cols[idx]
   247  }
   248  
   249  // GetByStoredIndex returns the Nth stored column (omitting virtual columns from index calculation)
   250  func (cc *ColCollection) GetByStoredIndex(idx int) Column {
   251  	return cc.cols[cc.storedIndexes[idx]]
   252  }
   253  
   254  // StoredIndexByTag returns the storage index of the column with the given tag, ignoring virtual columns
   255  func (cc *ColCollection) StoredIndexByTag(tag uint64) (int, bool) {
   256  	idx, ok := cc.tagToStorageIndex[tag]
   257  	return idx, ok
   258  }
   259  
   260  // Size returns the number of columns in the collection.
   261  func (cc *ColCollection) Size() int {
   262  	return len(cc.cols)
   263  }
   264  
   265  // StoredSize returns the number of non-virtual columns in the collection
   266  func (cc *ColCollection) StoredSize() int {
   267  	return len(cc.storedIndexes)
   268  }
   269  
   270  // Contains returns whether this column collection contains a column with the name given, case insensitive
   271  func (cc *ColCollection) Contains(name string) bool {
   272  	_, ok := cc.GetByNameCaseInsensitive(name)
   273  	return ok
   274  }
   275  
   276  // ColCollsAreEqual determines whether two ColCollections are equal.
   277  func ColCollsAreEqual(cc1, cc2 *ColCollection) bool {
   278  	if cc1.Size() != cc2.Size() {
   279  		return false
   280  	}
   281  	// Pks Cols need to be in the same order and equivalent.
   282  	for i := 0; i < cc1.Size(); i++ {
   283  		// Test that the columns are identical, but don't worry about tags matching, since
   284  		// different tags could be generated depending on how the schemas were created.
   285  		if !cc1.cols[i].EqualsWithoutTag(cc2.cols[i]) {
   286  			return false
   287  		}
   288  	}
   289  	return true
   290  }
   291  
   292  // MapColCollection applies a function to each column in a ColCollection and creates a new ColCollection from the results.
   293  func MapColCollection(cc *ColCollection, cb func(col Column) Column) *ColCollection {
   294  	mapped := make([]Column, cc.Size())
   295  	for i, c := range cc.cols {
   296  		mapped[i] = cb(c)
   297  	}
   298  	return NewColCollection(mapped...)
   299  }
   300  
   301  // FilterColCollection applies a boolean function to column in a ColCollection, it creates a new ColCollection from the
   302  // set of columns for which the function returned true.
   303  func FilterColCollection(cc *ColCollection, cb func(col Column) bool) *ColCollection {
   304  	filtered := make([]Column, 0, cc.Size())
   305  	for _, c := range cc.cols {
   306  		if cb(c) {
   307  			filtered = append(filtered, c)
   308  		}
   309  	}
   310  	return NewColCollection(filtered...)
   311  }