github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/schema/schema.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package schema
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  
    21  	"github.com/dolthub/vitess/go/vt/proto/query"
    22  	"gopkg.in/src-d/go-errors.v1"
    23  
    24  	"github.com/dolthub/dolt/go/libraries/utils/set"
    25  	"github.com/dolthub/dolt/go/store/types"
    26  	"github.com/dolthub/dolt/go/store/val"
    27  )
    28  
    29  // Schema defines the schema of a table and describes both its SQL schema and storage layout.
    30  //
    31  // For example, a SQL table defined as:
    32  //
    33  //	`CREATE TABLE t (a int, b int, pk2 int, c int, pk1 int, PRIMARY KEY (pk1, pk2));`
    34  //
    35  // Has a corresponding Schema of:
    36  //
    37  //	Schema {
    38  //		PkCols:     [pk1, pk2],
    39  //		NonPkCols:  [a, b, c],
    40  //		AllCols:    [a, b, pk2, c, pk1],
    41  //		PkOrdinals: [4, 2],
    42  //	}
    43  type Schema interface {
    44  	// GetPKCols gets the collection of columns which make the primary key.
    45  	// Columns in this collection are ordered by storage order, which is
    46  	// defined in the 'PRIMARY KEY(...)' clause of a CREATE TABLE statement.
    47  	GetPKCols() *ColCollection
    48  
    49  	// GetNonPKCols gets the collection of columns which are not part of the primary key.
    50  	// Columns in this collection are ordered by schema order (display order), which is
    51  	// defined by the order of first occurrence in a CREATE TABLE statement.
    52  	GetNonPKCols() *ColCollection
    53  
    54  	// GetAllCols gets the collection of all columns (pk and non-pk)
    55  	// Columns in this collection are ordered by schema order (display order), which is
    56  	// defined by the order of first occurrence in a CREATE TABLE statement.
    57  	GetAllCols() *ColCollection
    58  
    59  	// Indexes returns a collection of all indexes on the table that this schema belongs to.
    60  	Indexes() IndexCollection
    61  
    62  	// Checks returns a collection of all check constraints on the table that this schema belongs to.
    63  	Checks() CheckCollection
    64  
    65  	// GetPkOrdinals returns a slice of schema order positions for the primary key columns. These ith
    66  	// value of this slice contains schema position for the ith column in the PK ColCollection.
    67  	GetPkOrdinals() []int
    68  
    69  	// SetPkOrdinals specifies a primary key column ordering. See GetPkOrdinals.
    70  	SetPkOrdinals([]int) error
    71  
    72  	// AddColumn adds a column to this schema in the order given and returns the resulting Schema.
    73  	// The new column cannot be a primary key. To alter primary keys, create a new schema with those keys.
    74  	AddColumn(column Column, order *ColumnOrder) (Schema, error)
    75  
    76  	// GetMapDescriptors returns the key and value tuple descriptors for this schema.
    77  	GetMapDescriptors() (keyDesc, valueDesc val.TupleDesc)
    78  
    79  	// GetKeyDescriptor returns the key tuple descriptor for this schema.
    80  	// If a column has a type that can't appear in a key (such as "address" columns),
    81  	// that column will get converted to equivalent types that can. (Example: text -> varchar)
    82  	GetKeyDescriptor() val.TupleDesc
    83  
    84  	// GetKeyDescriptorWithNoConversion returns the a descriptor for the columns used in the key.
    85  	// Unlike `GetKeyDescriptor`, it doesn't attempt to convert columns if they can't appear in a key,
    86  	// and returns them as they are.
    87  	GetKeyDescriptorWithNoConversion() val.TupleDesc
    88  
    89  	// GetValueDescriptor returns the value tuple descriptor for this schema.
    90  	GetValueDescriptor() val.TupleDesc
    91  
    92  	// GetCollation returns the table's collation.
    93  	GetCollation() Collation
    94  
    95  	// SetCollation sets the table's collation.
    96  	SetCollation(collation Collation)
    97  
    98  	// GetComment returns the table's comment.
    99  	GetComment() string
   100  
   101  	// SetComment sets the table's comment.
   102  	SetComment(comment string)
   103  
   104  	// Copy returns a copy of this Schema that can be safely modified independently.
   105  	Copy() Schema
   106  }
   107  
   108  // ColumnOrder is used in ALTER TABLE statements to change the order of inserted / modified columns.
   109  type ColumnOrder struct {
   110  	First       bool   // True if this column should come first
   111  	AfterColumn string // Set to the name of the column after which this column should appear
   112  }
   113  
   114  // ColFromTag returns a schema.Column from a schema and a tag
   115  func ColFromTag(sch Schema, tag uint64) (Column, bool) {
   116  	return sch.GetAllCols().GetByTag(tag)
   117  }
   118  
   119  // ColFromName returns a schema.Column from a schema from it's name
   120  func ColFromName(sch Schema, name string) (Column, bool) {
   121  	return sch.GetAllCols().GetByName(name)
   122  }
   123  
   124  // ExtractAllColNames returns a map of tag to column name, with one map entry for every column in the schema.
   125  func ExtractAllColNames(sch Schema) (map[uint64]string, error) {
   126  	colNames := make(map[uint64]string)
   127  	err := sch.GetAllCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
   128  		colNames[tag] = col.Name
   129  		return false, nil
   130  	})
   131  
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  
   136  	return colNames, nil
   137  }
   138  
   139  func IsKeyless(sch Schema) bool {
   140  	return sch != nil &&
   141  		sch.GetPKCols().Size() == 0 &&
   142  		sch.GetAllCols().Size() != 0
   143  }
   144  
   145  func IsVirtual(sch Schema) bool {
   146  	return sch != nil && len(sch.GetAllCols().virtualColumns) > 0
   147  }
   148  
   149  func HasAutoIncrement(sch Schema) (ok bool) {
   150  	_ = sch.GetAllCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
   151  		if col.AutoIncrement {
   152  			ok = true
   153  			stop = true
   154  		}
   155  		return
   156  	})
   157  	return
   158  }
   159  
   160  // GetAutoIncrementColumn returns the auto increment column if one exists, with an existence boolean
   161  func GetAutoIncrementColumn(sch Schema) (col Column, ok bool) {
   162  	var aiCol Column
   163  	var found bool
   164  	_ = sch.GetAllCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
   165  		if col.AutoIncrement {
   166  			aiCol = col
   167  			found = true
   168  			stop = true
   169  		}
   170  		return
   171  	})
   172  
   173  	return aiCol, found
   174  }
   175  
   176  // SchemasAreEqual tests equality of two schemas.
   177  func SchemasAreEqual(sch1, sch2 Schema) bool {
   178  	if sch1 == nil && sch2 == nil {
   179  		return true
   180  	} else if sch1 == nil || sch2 == nil {
   181  		return false
   182  	}
   183  	colCollIsEqual := ColCollsAreEqual(sch1.GetAllCols(), sch2.GetAllCols())
   184  	if !colCollIsEqual {
   185  		return false
   186  	}
   187  
   188  	// Pks and Non-pks are in the same order as the key tuple and value tuple fields
   189  	if !ColCollsAreEqual(sch1.GetPKCols(), sch2.GetPKCols()) {
   190  		return false
   191  	}
   192  
   193  	if !ColCollsAreEqual(sch1.GetNonPKCols(), sch2.GetNonPKCols()) {
   194  		return false
   195  	}
   196  
   197  	if sch1.GetCollation() != sch2.GetCollation() {
   198  		return false
   199  	}
   200  
   201  	if (sch1.Checks() == nil) != (sch2.Checks() == nil) {
   202  		return false
   203  	}
   204  
   205  	if sch1.Checks() != nil && sch2.Checks() != nil &&
   206  		!sch1.Checks().Equals(sch2.Checks()) {
   207  		return false
   208  	}
   209  
   210  	return sch1.Indexes().Equals(sch2.Indexes())
   211  }
   212  
   213  // TODO: this function never returns an error
   214  // VerifyInSchema tests that the incoming schema matches the schema from the original table
   215  // based on the presence of the column name in the original schema.
   216  func VerifyInSchema(inSch, outSch Schema) (bool, error) {
   217  	inSchCols := inSch.GetAllCols()
   218  	outSchCols := outSch.GetAllCols()
   219  
   220  	if inSchCols.Size() != outSchCols.Size() {
   221  		return false, nil
   222  	}
   223  
   224  	match := true
   225  	err := inSchCols.Iter(func(tag uint64, inCol Column) (stop bool, err error) {
   226  		_, isValid := outSchCols.GetByNameCaseInsensitive(inCol.Name)
   227  
   228  		if !isValid {
   229  			match = false
   230  			return true, nil
   231  		}
   232  
   233  		return false, nil
   234  	})
   235  
   236  	if err != nil {
   237  		return false, err
   238  	}
   239  
   240  	return match, nil
   241  }
   242  
   243  // GetSharedCols return all columns in the schema that match the names and types given, which are parallel arrays
   244  // specifying columns to match.
   245  func GetSharedCols(schema Schema, cmpNames []string, cmpKinds []types.NomsKind) []Column {
   246  	existingCols := make(map[string]Column)
   247  
   248  	var shared []Column
   249  	_ = schema.GetAllCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
   250  		existingCols[col.Name] = col
   251  		return false, nil
   252  	})
   253  
   254  	for i, colName := range cmpNames {
   255  		if col, ok := existingCols[colName]; ok {
   256  			if col.Kind == cmpKinds[i] && strings.ToLower(col.Name) == strings.ToLower(cmpNames[i]) {
   257  				shared = append(shared, col)
   258  			}
   259  		}
   260  	}
   261  
   262  	return shared
   263  }
   264  
   265  // ArePrimaryKeySetsDiffable checks if two schemas are diffable. Assumes the
   266  // passed in schema are from the same table between commits. If __DOLT__, then
   267  // it also checks if the underlying SQL types of the columns are equal.
   268  func ArePrimaryKeySetsDiffable(format *types.NomsBinFormat, fromSch, toSch Schema) bool {
   269  	if fromSch == nil && toSch == nil {
   270  		return false
   271  		// Empty case
   272  	} else if fromSch == nil || fromSch.GetAllCols().Size() == 0 ||
   273  		toSch == nil || toSch.GetAllCols().Size() == 0 {
   274  		return true
   275  	}
   276  
   277  	// Keyless case for comparing
   278  	if IsKeyless(fromSch) && IsKeyless(toSch) {
   279  		return true
   280  	}
   281  
   282  	cc1 := fromSch.GetPKCols()
   283  	cc2 := toSch.GetPKCols()
   284  
   285  	if cc1.Size() != cc2.Size() {
   286  		return false
   287  	}
   288  
   289  	for i := 0; i < cc1.Size(); i++ {
   290  		c1 := cc1.GetByIndex(i)
   291  		c2 := cc2.GetByIndex(i)
   292  		if (c1.Tag != c2.Tag) || (c1.IsPartOfPK != c2.IsPartOfPK) {
   293  			return false
   294  		}
   295  		if types.IsFormat_DOLT(format) && !c1.TypeInfo.ToSqlType().Equals(c2.TypeInfo.ToSqlType()) {
   296  			return false
   297  		}
   298  	}
   299  
   300  	return true
   301  }
   302  
   303  // MapSchemaBasedOnTagAndName can be used to map column values from one schema
   304  // to another schema. A primary key column in |inSch| is mapped to |outSch| if
   305  // they share the same tag. A non-primary key column in |inSch| is mapped to
   306  // |outSch| purely based on the name. It returns ordinal mappings that can be
   307  // use to map key, value val.Tuple's of schema |inSch| to |outSch|. The first
   308  // ordinal map is for keys, and the second is for values. If a column of |inSch|
   309  // is missing in |outSch| then that column's index in the ordinal map holds -1.
   310  func MapSchemaBasedOnTagAndName(inSch, outSch Schema) ([]int, []int, error) {
   311  	keyMapping := make([]int, inSch.GetPKCols().Size())
   312  	valMapping := make([]int, inSch.GetNonPKCols().Size())
   313  
   314  	// if inSch or outSch is empty schema. This can be from added or dropped table.
   315  	if len(inSch.GetAllCols().cols) == 0 || len(outSch.GetAllCols().cols) == 0 {
   316  		return keyMapping, valMapping, nil
   317  	}
   318  
   319  	err := inSch.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
   320  		i := inSch.GetPKCols().TagToIdx[tag]
   321  		if foundCol, ok := outSch.GetPKCols().GetByTag(tag); ok {
   322  			j := outSch.GetPKCols().TagToIdx[foundCol.Tag]
   323  			keyMapping[i] = j
   324  		} else {
   325  			return true, fmt.Errorf("could not map primary key column %s", col.Name)
   326  		}
   327  		return false, nil
   328  	})
   329  	if err != nil {
   330  		return nil, nil, err
   331  	}
   332  
   333  	err = inSch.GetNonPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
   334  		i := inSch.GetNonPKCols().TagToIdx[col.Tag]
   335  		if col, ok := outSch.GetNonPKCols().GetByName(col.Name); ok {
   336  			j := outSch.GetNonPKCols().TagToIdx[col.Tag]
   337  			valMapping[i] = j
   338  		} else {
   339  			valMapping[i] = -1
   340  		}
   341  		return false, nil
   342  	})
   343  	if err != nil {
   344  		return nil, nil, err
   345  	}
   346  
   347  	return keyMapping, valMapping, nil
   348  }
   349  
   350  var ErrUsingSpatialKey = errors.NewKind("can't use Spatial Types as Primary Key for table %s")
   351  
   352  // IsColSpatialType returns whether a column's type is a spatial type
   353  func IsColSpatialType(c Column) bool {
   354  	return c.TypeInfo.ToSqlType().Type() == query.Type_GEOMETRY
   355  }
   356  
   357  // IsUsingSpatialColAsKey is a utility function that checks for any spatial types being used as a primary key
   358  func IsUsingSpatialColAsKey(sch Schema) bool {
   359  	pkCols := sch.GetPKCols()
   360  	cols := pkCols.GetColumns()
   361  	for _, c := range cols {
   362  		if IsColSpatialType(c) {
   363  			return true
   364  		}
   365  	}
   366  	return false
   367  }
   368  
   369  // CopyChecksConstraints copies check constraints from the |from| schema to the |to| schema and returns it
   370  func CopyChecksConstraints(from, to Schema) Schema {
   371  	fromSch, toSch := from.(*schemaImpl), to.(*schemaImpl)
   372  	toSch.checkCollection = fromSch.checkCollection
   373  	return toSch
   374  }
   375  
   376  // CopyIndexes copies secondary indexes from the |from| schema to the |to| schema and returns it
   377  func CopyIndexes(from, to Schema) Schema {
   378  	fromSch, toSch := from.(*schemaImpl), to.(*schemaImpl)
   379  	toSch.indexCollection = fromSch.indexCollection
   380  	return toSch
   381  }
   382  
   383  // GetKeyColumnTags returns a set.Uint64Set containing the column tags
   384  // of every key column of every primary and secondary index in |sch|.
   385  func GetKeyColumnTags(sch Schema) *set.Uint64Set {
   386  	tags := set.NewUint64Set(sch.GetPKCols().Tags)
   387  	_ = sch.Indexes().Iter(func(index Index) (stop bool, err error) {
   388  		tags.Add(index.IndexedColumnTags()...)
   389  		return
   390  	})
   391  	return tags
   392  }