github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/schema/super_schema.go (about)

     1  // Copyright 2020 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package schema
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  
    21  	"github.com/dolthub/dolt/go/libraries/utils/set"
    22  )
    23  
    24  // SuperSchema is the union of all Schemas over the history of a table
    25  // the tagNames map tracks all names corresponding to a column tag
    26  type SuperSchema struct {
    27  	// All columns that have existed in the history of the corresponding schema.
    28  	// Names of the columns are not stored in this collection as they can change
    29  	// over time.
    30  	// Constraints are not tracked in this collection or anywhere in SuperSchema
    31  	allCols *ColCollection
    32  
    33  	// All names in each column's history, keyed by tag.
    34  	// The columns latest name is index 0
    35  	tagNames map[uint64][]string
    36  }
    37  
    38  // NewSuperSchema creates a SuperSchema from the columns of schemas.
    39  func NewSuperSchema(schemas ...Schema) (*SuperSchema, error) {
    40  	cc := NewColCollection()
    41  	tn := make(map[uint64][]string)
    42  	ss := SuperSchema{cc, tn}
    43  
    44  	for _, sch := range schemas {
    45  		err := ss.AddSchemas(sch)
    46  		if err != nil {
    47  			return nil, err
    48  		}
    49  	}
    50  
    51  	return &ss, nil
    52  }
    53  
    54  // UnmarshalSuperSchema creates a SuperSchema, it is only used by the encoding package.
    55  func UnmarshalSuperSchema(allCols *ColCollection, tagNames map[uint64][]string) *SuperSchema {
    56  	return &SuperSchema{allCols, tagNames}
    57  }
    58  
    59  // AddColumn adds a column and its name to the SuperSchema
    60  func (ss *SuperSchema) AddColumn(col Column) (err error) {
    61  	ct := col.Tag
    62  	ac := ss.allCols
    63  	existingCol, found := ac.GetByTag(ct)
    64  	if found {
    65  		if !existingCol.Compatible(col) {
    66  			ecName := ss.tagNames[col.Tag][0]
    67  			return fmt.Errorf("tag collision for columns %s and %s, different definitions (tag: %d)",
    68  				ecName, col.Name, col.Tag)
    69  		}
    70  	}
    71  
    72  	names, found := ss.tagNames[col.Tag]
    73  	if found {
    74  		for _, nm := range names {
    75  			if nm == col.Name {
    76  				return nil
    77  			}
    78  		}
    79  		// we haven't seen this name for this column before
    80  		ss.tagNames[col.Tag] = append([]string{col.Name}, names...)
    81  		return nil
    82  	}
    83  
    84  	// we haven't seen this column before
    85  	ss.tagNames[col.Tag] = append(names, col.Name)
    86  	ss.allCols = ss.allCols.Append(simpleColumn(col))
    87  
    88  	return err
    89  }
    90  
    91  // AddSchemas adds all names and columns of each schema to the SuperSchema
    92  func (ss *SuperSchema) AddSchemas(schemas ...Schema) error {
    93  	for _, sch := range schemas {
    94  		err := sch.GetAllCols().Iter(func(_ uint64, col Column) (stop bool, err error) {
    95  			err = ss.AddColumn(col)
    96  			stop = err != nil
    97  			return stop, err
    98  		})
    99  		if err != nil {
   100  			return err
   101  		}
   102  	}
   103  	return nil
   104  }
   105  
   106  // GetByTag returns the corresponding column and true if found, returns InvalidCol and false otherwise
   107  func (ss *SuperSchema) GetByTag(tag uint64) (Column, bool) {
   108  	return ss.allCols.GetByTag(tag)
   109  }
   110  
   111  // Iter processes each column in the SuperSchema with the specified function
   112  func (ss *SuperSchema) Iter(cb func(tag uint64, col Column) (stop bool, err error)) error {
   113  	return ss.allCols.Iter(cb)
   114  }
   115  
   116  // AllColumnNames returns all names of the column corresponding to tag
   117  func (ss *SuperSchema) AllColumnNames(tag uint64) []string {
   118  	return ss.tagNames[tag]
   119  }
   120  
   121  // AllTags returns a slice of all tags contained in the SuperSchema
   122  func (ss *SuperSchema) AllTags() []uint64 {
   123  	return ss.allCols.Tags
   124  }
   125  
   126  // LatestColumnName returns the latest name of the column corresponding to tag
   127  func (ss *SuperSchema) LatestColumnName(tag uint64) string {
   128  	return ss.tagNames[tag][0]
   129  }
   130  
   131  // Size returns the number of columns in the SuperSchema
   132  func (ss *SuperSchema) Size() int {
   133  	return ss.allCols.Size()
   134  }
   135  
   136  // Equals returns true iff the SuperSchemas have the same ColCollections and tagNames maps
   137  func (ss *SuperSchema) Equals(oss *SuperSchema) bool {
   138  	// check equality of column collections
   139  	if ss.Size() != oss.Size() {
   140  		return false
   141  	}
   142  
   143  	ssEqual := true
   144  	_ = ss.Iter(func(tag uint64, col Column) (stop bool, err error) {
   145  		otherCol, found := oss.allCols.GetByTag(tag)
   146  
   147  		if !found {
   148  			ssEqual = false
   149  		}
   150  
   151  		if !col.Equals(otherCol) {
   152  			ssEqual = false
   153  		}
   154  
   155  		return !ssEqual, nil
   156  	})
   157  
   158  	if !ssEqual {
   159  		return false
   160  	}
   161  
   162  	// check equality of column name lists
   163  	if len(ss.tagNames) != len(oss.tagNames) {
   164  		return false
   165  	}
   166  
   167  	for colTag, colNames := range ss.tagNames {
   168  		otherColNames, found := oss.tagNames[colTag]
   169  
   170  		if !found {
   171  			return false
   172  		}
   173  
   174  		if !set.NewStrSet(colNames).Equals(set.NewStrSet(otherColNames)) {
   175  			return false
   176  		}
   177  	}
   178  	return true
   179  }
   180  
   181  func (ss *SuperSchema) nameColumns() map[uint64]string {
   182  	// create a unique name for each column
   183  	collisions := make(map[string][]uint64)
   184  	uniqNames := make(map[uint64]string)
   185  	for tag, names := range ss.tagNames {
   186  		n := names[0]
   187  		uniqNames[tag] = n
   188  		collisions[n] = append(collisions[n], tag)
   189  	}
   190  	for name, tags := range collisions {
   191  		// if a name is used by more than one column, concat its tag
   192  		if len(tags) > 1 {
   193  			for _, t := range tags {
   194  				uniqNames[t] = fmt.Sprintf("%s_%d", name, t)
   195  			}
   196  		}
   197  	}
   198  	return uniqNames
   199  }
   200  
   201  // GenerateColCollection creates a ColCollection from all the columns in the SuperSchema.
   202  // Each column is assigned its latest name from its name history.
   203  func (ss *SuperSchema) GenerateColCollection() (*ColCollection, error) {
   204  	uniqNames := ss.nameColumns()
   205  	cc := NewColCollection()
   206  	err := ss.Iter(func(tag uint64, col Column) (stop bool, err error) {
   207  		col.Name = uniqNames[tag]
   208  		cc = cc.Append(col)
   209  		stop = err != nil
   210  		return stop, err
   211  	})
   212  
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  
   217  	return cc, nil
   218  }
   219  
   220  // GenerateSchema creates a Schema from all the columns in the SuperSchema.
   221  // Each column is assigned its latest name from its name history.
   222  func (ss *SuperSchema) GenerateSchema() (Schema, error) {
   223  	cc, err := ss.GenerateColCollection()
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  	return SchemaFromCols(cc)
   228  }
   229  
   230  // NameMapForSchema creates a field name mapping needed to construct a rowconv.RowConverter
   231  // sch columns are mapped by tag to the corresponding SuperSchema columns
   232  func (ss *SuperSchema) NameMapForSchema(sch Schema) (map[string]string, error) {
   233  	inNameToOutName := make(map[string]string)
   234  	uniqNames := ss.nameColumns()
   235  	allCols := sch.GetAllCols()
   236  	err := allCols.Iter(func(tag uint64, col Column) (stop bool, err error) {
   237  		_, ok := uniqNames[tag]
   238  		if !ok {
   239  			return true, errors.New("failed to map columns")
   240  		}
   241  		inNameToOutName[col.Name] = uniqNames[tag]
   242  		return false, nil
   243  	})
   244  
   245  	if err != nil {
   246  		return nil, err
   247  	}
   248  
   249  	return inNameToOutName, nil
   250  }
   251  
   252  // RebaseTag changes the tag of a column from oldTag to newTag.
   253  func (ss *SuperSchema) RebaseTag(tagMapping map[uint64]uint64) (*SuperSchema, error) {
   254  	tn := make(map[uint64][]string)
   255  	var cc []Column
   256  	err := ss.allCols.Iter(func(tag uint64, col Column) (stop bool, err error) {
   257  		if newTag, found := tagMapping[tag]; found {
   258  			col.Tag = newTag
   259  			tn[newTag] = ss.tagNames[tag]
   260  		} else {
   261  			tn[tag] = ss.tagNames[tag]
   262  		}
   263  		cc = append(cc, col)
   264  		return false, nil
   265  	})
   266  
   267  	if err != nil {
   268  		return nil, err
   269  	}
   270  
   271  	ac := NewColCollection(cc...)
   272  
   273  	return &SuperSchema{ac, tn}, nil
   274  }
   275  
   276  // SuperSchemaUnion combines multiple SuperSchemas.
   277  func SuperSchemaUnion(superSchemas ...*SuperSchema) (*SuperSchema, error) {
   278  	cc := NewColCollection()
   279  	tagNameSets := make(map[uint64]*set.StrSet)
   280  	latestNames := make(map[uint64]string)
   281  	for _, ss := range superSchemas {
   282  		err := ss.Iter(func(tag uint64, col Column) (stop bool, err error) {
   283  			_, found := cc.GetByTag(tag)
   284  
   285  			if !found {
   286  				tagNameSets[tag] = set.NewStrSet(ss.AllColumnNames(tag))
   287  				cc = cc.Append(simpleColumn(col))
   288  			} else {
   289  				tagNameSets[tag].Add(ss.AllColumnNames(tag)...)
   290  			}
   291  			latestNames[tag] = ss.AllColumnNames(tag)[0]
   292  
   293  			stop = err != nil
   294  			return stop, err
   295  		})
   296  
   297  		if err != nil {
   298  			return nil, err
   299  		}
   300  	}
   301  
   302  	tn := make(map[uint64][]string)
   303  	for tag, nameSet := range tagNameSets {
   304  		nn := []string{latestNames[tag]}
   305  		nameSet.Remove(latestNames[tag])
   306  		tn[tag] = append(nn, nameSet.AsSlice()...)
   307  	}
   308  
   309  	return &SuperSchema{cc, tn}, nil
   310  }
   311  
   312  // SuperSchema only retains basic info about the column def
   313  func simpleColumn(col Column) Column {
   314  	return Column{
   315  		// column names are tracked in SuperSchema.tagNames
   316  		Name:       "",
   317  		Tag:        col.Tag,
   318  		Kind:       col.Kind,
   319  		IsPartOfPK: col.IsPartOfPK,
   320  		TypeInfo:   col.TypeInfo,
   321  	}
   322  }