github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/schema/col_coll.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package schema 16 17 import ( 18 "errors" 19 "sort" 20 "strings" 21 ) 22 23 // ErrColTagCollision is an error that is returned when two columns within a ColCollection have the same tag 24 // but a different name or type 25 var ErrColTagCollision = errors.New("two different columns with the same tag") 26 27 // ErrColNotFound is an error that is returned when attempting an operation on a column that does not exist 28 var ErrColNotFound = errors.New("column not found") 29 30 // ErrColNameCollision is an error that is returned when two columns within a ColCollection have the same name but a 31 // different type or tag 32 var ErrColNameCollision = errors.New("two different columns with the same name exist") 33 34 // ErrNoPrimaryKeyColumns is an error that is returned when no primary key columns are found 35 var ErrNoPrimaryKeyColumns = errors.New("no primary key columns") 36 37 var ErrNonAutoIncType = errors.New("column type cannot be auto incremented") 38 39 var EmptyColColl = &ColCollection{ 40 cols: []Column{}, 41 Tags: []uint64{}, 42 SortedTags: []uint64{}, 43 TagToCol: map[uint64]Column{}, 44 NameToCol: map[string]Column{}, 45 LowerNameToCol: map[string]Column{}, 46 TagToIdx: map[uint64]int{}, 47 } 48 49 // ColCollection is a collection of columns. As a stand-alone collection, all columns in the collection must have unique 50 // tags. To be instantiated as a schema for writing to the database, names must also be unique. 51 // See schema.ValidateForInsert for details. 52 type ColCollection struct { 53 cols []Column 54 // virtualColumns stores the indexes of any virtual columns in the collection 55 virtualColumns []int 56 // storedIndexes stores the indexes of the stored columns in the collection 57 storedIndexes []int 58 // Tags is a list of all the tags in the ColCollection in their original order. 59 Tags []uint64 60 // SortedTags is a list of all the tags in the ColCollection in sorted order. 61 SortedTags []uint64 62 // TagToCol is a map of tag to column 63 TagToCol map[uint64]Column 64 // NameToCol is a map from name to column 65 NameToCol map[string]Column 66 // LowerNameToCol is a map from lower-cased name to column 67 LowerNameToCol map[string]Column 68 // TagToIdx is a map from a tag to the column index 69 TagToIdx map[uint64]int 70 // tagToStorageIndex is a map from a tag to the physical storage column index 71 tagToStorageIndex map[uint64]int 72 } 73 74 // NewColCollection creates a new collection from a list of columns. If any columns have the same tag, by-tag lookups in 75 // this collection will not function correctly. If any columns have the same name, by-name lookups from this collection 76 // will not function correctly. If any columns have the same case-insensitive name, case-insensitive lookups will be 77 // unable to return the correct column in all cases. 78 // For this collection to be used as a Dolt schema, it must pass schema.ValidateForInsert. 79 func NewColCollection(cols ...Column) *ColCollection { 80 var tags []uint64 81 var sortedTags []uint64 82 83 tagToCol := make(map[uint64]Column, len(cols)) 84 nameToCol := make(map[string]Column, len(cols)) 85 lowerNameToCol := make(map[string]Column, len(cols)) 86 tagToIdx := make(map[uint64]int, len(cols)) 87 tagToStorageIndex := make(map[uint64]int, len(cols)) 88 var virtualColumns []int 89 90 var columns []Column 91 var storedIndexes []int 92 storageIdx := 0 93 for i, col := range cols { 94 // If multiple columns have the same tag, the last one is used for tag lookups. 95 // Columns must have unique tags to pass schema.ValidateForInsert. 96 columns = append(columns, col) 97 tagToCol[col.Tag] = col 98 tagToIdx[col.Tag] = i 99 tags = append(tags, col.Tag) 100 sortedTags = append(sortedTags, col.Tag) 101 nameToCol[col.Name] = cols[i] 102 103 // If multiple columns have the same lower case name, the first one is used for case-insensitive matching. 104 // Column names must all be case-insensitive different to pass schema.ValidateForInsert. 105 lowerCaseName := strings.ToLower(col.Name) 106 if _, ok := lowerNameToCol[lowerCaseName]; !ok { 107 lowerNameToCol[lowerCaseName] = cols[i] 108 } 109 110 if col.Virtual { 111 virtualColumns = append(virtualColumns, i) 112 } else { 113 storedIndexes = append(storedIndexes, i) 114 tagToStorageIndex[col.Tag] = storageIdx 115 storageIdx++ 116 } 117 } 118 119 sort.Slice(sortedTags, func(i, j int) bool { return sortedTags[i] < sortedTags[j] }) 120 121 return &ColCollection{ 122 cols: columns, 123 virtualColumns: virtualColumns, 124 storedIndexes: storedIndexes, 125 tagToStorageIndex: tagToStorageIndex, 126 Tags: tags, 127 SortedTags: sortedTags, 128 TagToCol: tagToCol, 129 NameToCol: nameToCol, 130 LowerNameToCol: lowerNameToCol, 131 TagToIdx: tagToIdx, 132 } 133 } 134 135 // GetColumns returns the underlying list of columns. The list returned is a copy. 136 func (cc *ColCollection) GetColumns() []Column { 137 colsCopy := make([]Column, len(cc.cols)) 138 copy(colsCopy, cc.cols) 139 return colsCopy 140 } 141 142 // GetColumnNames returns a list of names of the columns. 143 func (cc *ColCollection) GetColumnNames() []string { 144 names := make([]string, len(cc.cols)) 145 for i, col := range cc.cols { 146 names[i] = col.Name 147 } 148 return names 149 } 150 151 // AppendColl returns a new collection with the additional ColCollection's columns appended 152 func (cc *ColCollection) AppendColl(colColl *ColCollection) *ColCollection { 153 return cc.Append(colColl.cols...) 154 } 155 156 // Append returns a new collection with the additional columns appended 157 func (cc *ColCollection) Append(cols ...Column) *ColCollection { 158 allCols := make([]Column, 0, len(cols)+len(cc.cols)) 159 allCols = append(allCols, cc.cols...) 160 allCols = append(allCols, cols...) 161 162 return NewColCollection(allCols...) 163 } 164 165 // IndexOf returns the index of the column with the name given (case-insensitive) or -1 if it's not found 166 func (cc *ColCollection) IndexOf(colName string) int { 167 idx := -1 168 169 var i = 0 170 _ = cc.Iter(func(tag uint64, col Column) (stop bool, err error) { 171 defer func() { 172 i++ 173 }() 174 if strings.ToLower(col.Name) == strings.ToLower(colName) { 175 idx = i 176 stop = true 177 } 178 return 179 }) 180 181 return idx 182 } 183 184 // Iter iterates over all the columns in the supplied ordering 185 func (cc *ColCollection) Iter(cb func(tag uint64, col Column) (stop bool, err error)) error { 186 for _, col := range cc.cols { 187 if stop, err := cb(col.Tag, col); err != nil { 188 return err 189 } else if stop { 190 break 191 } 192 } 193 194 return nil 195 } 196 197 // IterInSortedOrder iterates over all the columns from lowest tag to highest tag. 198 func (cc *ColCollection) IterInSortedOrder(cb func(tag uint64, col Column) (stop bool)) { 199 for _, tag := range cc.SortedTags { 200 val := cc.TagToCol[tag] 201 if stop := cb(tag, val); stop { 202 break 203 } 204 } 205 } 206 207 // GetByName takes the name of a column and returns the column and true if found. Otherwise InvalidCol and false are 208 // returned. 209 func (cc *ColCollection) GetByName(name string) (Column, bool) { 210 val, ok := cc.NameToCol[name] 211 212 if ok { 213 return val, true 214 } 215 216 return InvalidCol, false 217 } 218 219 // GetByNameCaseInsensitive takes the name of a column and returns the column and true if there is a column with that 220 // name ignoring case. Otherwise InvalidCol and false are returned. If multiple columns have the same case-insensitive 221 // name, the first declared one is returned. 222 func (cc *ColCollection) GetByNameCaseInsensitive(name string) (Column, bool) { 223 val, ok := cc.LowerNameToCol[strings.ToLower(name)] 224 225 if ok { 226 return val, true 227 } 228 229 return InvalidCol, false 230 } 231 232 // GetByTag takes a tag and returns the corresponding column and true if found, otherwise InvalidCol and false are 233 // returned 234 func (cc *ColCollection) GetByTag(tag uint64) (Column, bool) { 235 val, ok := cc.TagToCol[tag] 236 237 if ok { 238 return val, true 239 } 240 241 return InvalidCol, false 242 } 243 244 // GetByIndex returns the Nth column in the collection 245 func (cc *ColCollection) GetByIndex(idx int) Column { 246 return cc.cols[idx] 247 } 248 249 // GetByStoredIndex returns the Nth stored column (omitting virtual columns from index calculation) 250 func (cc *ColCollection) GetByStoredIndex(idx int) Column { 251 return cc.cols[cc.storedIndexes[idx]] 252 } 253 254 // StoredIndexByTag returns the storage index of the column with the given tag, ignoring virtual columns 255 func (cc *ColCollection) StoredIndexByTag(tag uint64) (int, bool) { 256 idx, ok := cc.tagToStorageIndex[tag] 257 return idx, ok 258 } 259 260 // Size returns the number of columns in the collection. 261 func (cc *ColCollection) Size() int { 262 return len(cc.cols) 263 } 264 265 // StoredSize returns the number of non-virtual columns in the collection 266 func (cc *ColCollection) StoredSize() int { 267 return len(cc.storedIndexes) 268 } 269 270 // Contains returns whether this column collection contains a column with the name given, case insensitive 271 func (cc *ColCollection) Contains(name string) bool { 272 _, ok := cc.GetByNameCaseInsensitive(name) 273 return ok 274 } 275 276 // ColCollsAreEqual determines whether two ColCollections are equal. 277 func ColCollsAreEqual(cc1, cc2 *ColCollection) bool { 278 if cc1.Size() != cc2.Size() { 279 return false 280 } 281 // Pks Cols need to be in the same order and equivalent. 282 for i := 0; i < cc1.Size(); i++ { 283 // Test that the columns are identical, but don't worry about tags matching, since 284 // different tags could be generated depending on how the schemas were created. 285 if !cc1.cols[i].EqualsWithoutTag(cc2.cols[i]) { 286 return false 287 } 288 } 289 return true 290 } 291 292 // MapColCollection applies a function to each column in a ColCollection and creates a new ColCollection from the results. 293 func MapColCollection(cc *ColCollection, cb func(col Column) Column) *ColCollection { 294 mapped := make([]Column, cc.Size()) 295 for i, c := range cc.cols { 296 mapped[i] = cb(c) 297 } 298 return NewColCollection(mapped...) 299 } 300 301 // FilterColCollection applies a boolean function to column in a ColCollection, it creates a new ColCollection from the 302 // set of columns for which the function returned true. 303 func FilterColCollection(cc *ColCollection, cb func(col Column) bool) *ColCollection { 304 filtered := make([]Column, 0, cc.Size()) 305 for _, c := range cc.cols { 306 if cb(c) { 307 filtered = append(filtered, c) 308 } 309 } 310 return NewColCollection(filtered...) 311 }