github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/schema/super_schema.go (about) 1 // Copyright 2020 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package schema 16 17 import ( 18 "errors" 19 "fmt" 20 21 "github.com/dolthub/dolt/go/libraries/utils/set" 22 ) 23 24 // SuperSchema is the union of all Schemas over the history of a table 25 // the tagNames map tracks all names corresponding to a column tag 26 type SuperSchema struct { 27 // All columns that have existed in the history of the corresponding schema. 28 // Names of the columns are not stored in this collection as they can change 29 // over time. 30 // Constraints are not tracked in this collection or anywhere in SuperSchema 31 allCols *ColCollection 32 33 // All names in each column's history, keyed by tag. 34 // The columns latest name is index 0 35 tagNames map[uint64][]string 36 } 37 38 // NewSuperSchema creates a SuperSchema from the columns of schemas. 39 func NewSuperSchema(schemas ...Schema) (*SuperSchema, error) { 40 cc := NewColCollection() 41 tn := make(map[uint64][]string) 42 ss := SuperSchema{cc, tn} 43 44 for _, sch := range schemas { 45 err := ss.AddSchemas(sch) 46 if err != nil { 47 return nil, err 48 } 49 } 50 51 return &ss, nil 52 } 53 54 // UnmarshalSuperSchema creates a SuperSchema, it is only used by the encoding package. 55 func UnmarshalSuperSchema(allCols *ColCollection, tagNames map[uint64][]string) *SuperSchema { 56 return &SuperSchema{allCols, tagNames} 57 } 58 59 // AddColumn adds a column and its name to the SuperSchema 60 func (ss *SuperSchema) AddColumn(col Column) (err error) { 61 ct := col.Tag 62 ac := ss.allCols 63 existingCol, found := ac.GetByTag(ct) 64 if found { 65 if !existingCol.Compatible(col) { 66 ecName := ss.tagNames[col.Tag][0] 67 return fmt.Errorf("tag collision for columns %s and %s, different definitions (tag: %d)", 68 ecName, col.Name, col.Tag) 69 } 70 } 71 72 names, found := ss.tagNames[col.Tag] 73 if found { 74 for _, nm := range names { 75 if nm == col.Name { 76 return nil 77 } 78 } 79 // we haven't seen this name for this column before 80 ss.tagNames[col.Tag] = append([]string{col.Name}, names...) 81 return nil 82 } 83 84 // we haven't seen this column before 85 ss.tagNames[col.Tag] = append(names, col.Name) 86 ss.allCols = ss.allCols.Append(simpleColumn(col)) 87 88 return err 89 } 90 91 // AddSchemas adds all names and columns of each schema to the SuperSchema 92 func (ss *SuperSchema) AddSchemas(schemas ...Schema) error { 93 for _, sch := range schemas { 94 err := sch.GetAllCols().Iter(func(_ uint64, col Column) (stop bool, err error) { 95 err = ss.AddColumn(col) 96 stop = err != nil 97 return stop, err 98 }) 99 if err != nil { 100 return err 101 } 102 } 103 return nil 104 } 105 106 // GetByTag returns the corresponding column and true if found, returns InvalidCol and false otherwise 107 func (ss *SuperSchema) GetByTag(tag uint64) (Column, bool) { 108 return ss.allCols.GetByTag(tag) 109 } 110 111 // Iter processes each column in the SuperSchema with the specified function 112 func (ss *SuperSchema) Iter(cb func(tag uint64, col Column) (stop bool, err error)) error { 113 return ss.allCols.Iter(cb) 114 } 115 116 // AllColumnNames returns all names of the column corresponding to tag 117 func (ss *SuperSchema) AllColumnNames(tag uint64) []string { 118 return ss.tagNames[tag] 119 } 120 121 // AllTags returns a slice of all tags contained in the SuperSchema 122 func (ss *SuperSchema) AllTags() []uint64 { 123 return ss.allCols.Tags 124 } 125 126 // LatestColumnName returns the latest name of the column corresponding to tag 127 func (ss *SuperSchema) LatestColumnName(tag uint64) string { 128 return ss.tagNames[tag][0] 129 } 130 131 // Size returns the number of columns in the SuperSchema 132 func (ss *SuperSchema) Size() int { 133 return ss.allCols.Size() 134 } 135 136 // Equals returns true iff the SuperSchemas have the same ColCollections and tagNames maps 137 func (ss *SuperSchema) Equals(oss *SuperSchema) bool { 138 // check equality of column collections 139 if ss.Size() != oss.Size() { 140 return false 141 } 142 143 ssEqual := true 144 _ = ss.Iter(func(tag uint64, col Column) (stop bool, err error) { 145 otherCol, found := oss.allCols.GetByTag(tag) 146 147 if !found { 148 ssEqual = false 149 } 150 151 if !col.Equals(otherCol) { 152 ssEqual = false 153 } 154 155 return !ssEqual, nil 156 }) 157 158 if !ssEqual { 159 return false 160 } 161 162 // check equality of column name lists 163 if len(ss.tagNames) != len(oss.tagNames) { 164 return false 165 } 166 167 for colTag, colNames := range ss.tagNames { 168 otherColNames, found := oss.tagNames[colTag] 169 170 if !found { 171 return false 172 } 173 174 if !set.NewStrSet(colNames).Equals(set.NewStrSet(otherColNames)) { 175 return false 176 } 177 } 178 return true 179 } 180 181 func (ss *SuperSchema) nameColumns() map[uint64]string { 182 // create a unique name for each column 183 collisions := make(map[string][]uint64) 184 uniqNames := make(map[uint64]string) 185 for tag, names := range ss.tagNames { 186 n := names[0] 187 uniqNames[tag] = n 188 collisions[n] = append(collisions[n], tag) 189 } 190 for name, tags := range collisions { 191 // if a name is used by more than one column, concat its tag 192 if len(tags) > 1 { 193 for _, t := range tags { 194 uniqNames[t] = fmt.Sprintf("%s_%d", name, t) 195 } 196 } 197 } 198 return uniqNames 199 } 200 201 // GenerateColCollection creates a ColCollection from all the columns in the SuperSchema. 202 // Each column is assigned its latest name from its name history. 203 func (ss *SuperSchema) GenerateColCollection() (*ColCollection, error) { 204 uniqNames := ss.nameColumns() 205 cc := NewColCollection() 206 err := ss.Iter(func(tag uint64, col Column) (stop bool, err error) { 207 col.Name = uniqNames[tag] 208 cc = cc.Append(col) 209 stop = err != nil 210 return stop, err 211 }) 212 213 if err != nil { 214 return nil, err 215 } 216 217 return cc, nil 218 } 219 220 // GenerateSchema creates a Schema from all the columns in the SuperSchema. 221 // Each column is assigned its latest name from its name history. 222 func (ss *SuperSchema) GenerateSchema() (Schema, error) { 223 cc, err := ss.GenerateColCollection() 224 if err != nil { 225 return nil, err 226 } 227 return SchemaFromCols(cc) 228 } 229 230 // NameMapForSchema creates a field name mapping needed to construct a rowconv.RowConverter 231 // sch columns are mapped by tag to the corresponding SuperSchema columns 232 func (ss *SuperSchema) NameMapForSchema(sch Schema) (map[string]string, error) { 233 inNameToOutName := make(map[string]string) 234 uniqNames := ss.nameColumns() 235 allCols := sch.GetAllCols() 236 err := allCols.Iter(func(tag uint64, col Column) (stop bool, err error) { 237 _, ok := uniqNames[tag] 238 if !ok { 239 return true, errors.New("failed to map columns") 240 } 241 inNameToOutName[col.Name] = uniqNames[tag] 242 return false, nil 243 }) 244 245 if err != nil { 246 return nil, err 247 } 248 249 return inNameToOutName, nil 250 } 251 252 // RebaseTag changes the tag of a column from oldTag to newTag. 253 func (ss *SuperSchema) RebaseTag(tagMapping map[uint64]uint64) (*SuperSchema, error) { 254 tn := make(map[uint64][]string) 255 var cc []Column 256 err := ss.allCols.Iter(func(tag uint64, col Column) (stop bool, err error) { 257 if newTag, found := tagMapping[tag]; found { 258 col.Tag = newTag 259 tn[newTag] = ss.tagNames[tag] 260 } else { 261 tn[tag] = ss.tagNames[tag] 262 } 263 cc = append(cc, col) 264 return false, nil 265 }) 266 267 if err != nil { 268 return nil, err 269 } 270 271 ac := NewColCollection(cc...) 272 273 return &SuperSchema{ac, tn}, nil 274 } 275 276 // SuperSchemaUnion combines multiple SuperSchemas. 277 func SuperSchemaUnion(superSchemas ...*SuperSchema) (*SuperSchema, error) { 278 cc := NewColCollection() 279 tagNameSets := make(map[uint64]*set.StrSet) 280 latestNames := make(map[uint64]string) 281 for _, ss := range superSchemas { 282 err := ss.Iter(func(tag uint64, col Column) (stop bool, err error) { 283 _, found := cc.GetByTag(tag) 284 285 if !found { 286 tagNameSets[tag] = set.NewStrSet(ss.AllColumnNames(tag)) 287 cc = cc.Append(simpleColumn(col)) 288 } else { 289 tagNameSets[tag].Add(ss.AllColumnNames(tag)...) 290 } 291 latestNames[tag] = ss.AllColumnNames(tag)[0] 292 293 stop = err != nil 294 return stop, err 295 }) 296 297 if err != nil { 298 return nil, err 299 } 300 } 301 302 tn := make(map[uint64][]string) 303 for tag, nameSet := range tagNameSets { 304 nn := []string{latestNames[tag]} 305 nameSet.Remove(latestNames[tag]) 306 tn[tag] = append(nn, nameSet.AsSlice()...) 307 } 308 309 return &SuperSchema{cc, tn}, nil 310 } 311 312 // SuperSchema only retains basic info about the column def 313 func simpleColumn(col Column) Column { 314 return Column{ 315 // column names are tracked in SuperSchema.tagNames 316 Name: "", 317 Tag: col.Tag, 318 Kind: col.Kind, 319 IsPartOfPK: col.IsPartOfPK, 320 TypeInfo: col.TypeInfo, 321 } 322 }