github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/schema/schema_impl.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package schema 16 17 import ( 18 "errors" 19 "fmt" 20 "strconv" 21 "strings" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 gmstypes "github.com/dolthub/go-mysql-server/sql/types" 25 "github.com/dolthub/vitess/go/vt/proto/query" 26 27 "github.com/dolthub/dolt/go/gen/fb/serial" 28 "github.com/dolthub/dolt/go/store/types" 29 "github.com/dolthub/dolt/go/store/val" 30 ) 31 32 var FeatureFlagKeylessSchema = true 33 34 // EmptySchema is an instance of a schema with no columns. 35 var EmptySchema = &schemaImpl{ 36 pkCols: EmptyColColl, 37 nonPKCols: EmptyColColl, 38 allCols: EmptyColColl, 39 indexCollection: NewIndexCollection(nil, nil), 40 } 41 42 type schemaImpl struct { 43 pkCols, nonPKCols, allCols *ColCollection 44 indexCollection IndexCollection 45 checkCollection CheckCollection 46 pkOrdinals []int 47 collation Collation 48 contentHashedFields []uint64 49 comment string 50 } 51 52 var _ Schema = (*schemaImpl)(nil) 53 54 var ErrInvalidPkOrdinals = errors.New("incorrect number of primary key ordinals") 55 var ErrMultipleNotNullConstraints = errors.New("multiple not null constraints on same column") 56 57 // NewSchema creates a fully defined schema from its parameters. 58 // This function should be updated when new components are added to Schema. 59 // If |len(pkOrdinals)| == 0, then the default ordinals are kept. |indexes| and |checks| may be nil. 60 func NewSchema(allCols *ColCollection, pkOrdinals []int, collation Collation, indexes IndexCollection, checks CheckCollection) (Schema, error) { 61 sch, err := SchemaFromCols(allCols) 62 if err != nil { 63 return nil, err 64 } 65 66 if len(pkOrdinals) != 0 { 67 err = sch.SetPkOrdinals(pkOrdinals) 68 if err != nil { 69 return nil, err 70 } 71 } 72 73 sch.SetCollation(collation) 74 75 if indexes != nil { 76 indexColImpl := indexes.(*indexCollectionImpl) 77 78 sch.(*schemaImpl).indexCollection = indexColImpl 79 80 // Index collection contains information about the total list of columns and their definitions. 81 // Do a simple sanity check here to make sure those columns match |allCols|. 82 // TODO: Add an equality check between |allCols| and the cols that |indexes| refer to. 83 84 if len(indexColImpl.pks) != sch.GetPKCols().Size() { 85 return nil, fmt.Errorf("IndexCollection referring to %d pks while Schema refers to %d pks", len(indexColImpl.pks), sch.GetPKCols().Size()) 86 } 87 for i, tag := range sch.GetPKCols().Tags { 88 if indexColImpl.pks[i] != tag { 89 return nil, fmt.Errorf("IndexCollection pk tags does not match Schema's pk tags") 90 } 91 } 92 } 93 94 if checks != nil { 95 sch.(*schemaImpl).checkCollection = checks 96 } 97 98 return sch, nil 99 } 100 101 // SchemaFromCols creates a Schema from a collection of columns 102 // 103 // Deprecated: Use NewSchema instead. 104 func SchemaFromCols(allCols *ColCollection) (Schema, error) { 105 var pkCols []Column 106 var nonPKCols []Column 107 108 defaultPkOrds := make([]int, 0) 109 for i, c := range allCols.cols { 110 if c.IsPartOfPK { 111 pkCols = append(pkCols, c) 112 defaultPkOrds = append(defaultPkOrds, i) 113 } else { 114 nonPKCols = append(nonPKCols, c) 115 } 116 } 117 118 if len(pkCols) == 0 && !FeatureFlagKeylessSchema { 119 return nil, ErrNoPrimaryKeyColumns 120 } 121 122 pkColColl := NewColCollection(pkCols...) 123 nonPKColColl := NewColCollection(nonPKCols...) 124 125 sch := SchemaFromColCollections(allCols, pkColColl, nonPKColColl) 126 err := sch.SetPkOrdinals(defaultPkOrds) 127 if err != nil { 128 return nil, err 129 } 130 sch.SetCollation(Collation_Default) 131 return sch, nil 132 } 133 134 // SchemaFromColCollections creates a schema from the three collections. 135 // 136 // Deprecated: Use NewSchema instead. 137 func SchemaFromColCollections(allCols, pkColColl, nonPKColColl *ColCollection) Schema { 138 return &schemaImpl{ 139 pkCols: pkColColl, 140 nonPKCols: nonPKColColl, 141 allCols: allCols, 142 indexCollection: NewIndexCollection(allCols, pkColColl), 143 checkCollection: NewCheckCollection(), 144 pkOrdinals: []int{}, 145 collation: Collation_Default, 146 } 147 } 148 149 func MustSchemaFromCols(typedColColl *ColCollection) Schema { 150 sch, err := SchemaFromCols(typedColColl) 151 if err != nil { 152 panic(err) 153 } 154 return sch 155 } 156 157 // ValidateColumnConstraints removes any duplicate NOT NULL column constraints from schemas. 158 func ValidateColumnConstraints(allCols *ColCollection) error { 159 for _, col := range allCols.cols { 160 seenNotNull := false 161 for _, cc := range col.Constraints { 162 if cc.GetConstraintType() == NotNullConstraintType { 163 if seenNotNull { 164 return ErrMultipleNotNullConstraints 165 } 166 seenNotNull = true 167 } 168 } 169 } 170 return nil 171 } 172 173 // ValidateForInsert returns an error if the given schema cannot be written to the dolt database. 174 func ValidateForInsert(allCols *ColCollection) error { 175 var seenPkCol bool 176 for _, c := range allCols.cols { 177 if c.IsPartOfPK { 178 seenPkCol = true 179 break 180 } 181 c.TypeInfo.ToSqlType() 182 } 183 184 if !seenPkCol && !FeatureFlagKeylessSchema { 185 return ErrNoPrimaryKeyColumns 186 } 187 188 colNames := make(map[string]bool) 189 colTags := make(map[uint64]bool) 190 191 err := allCols.Iter(func(tag uint64, col Column) (stop bool, err error) { 192 if _, ok := colTags[tag]; ok { 193 return true, ErrColTagCollision 194 } 195 colTags[tag] = true 196 197 if _, ok := colNames[strings.ToLower(col.Name)]; ok { 198 return true, ErrColNameCollision 199 } 200 colNames[col.Name] = true 201 202 if col.AutoIncrement && !(isAutoIncrementKind(col.Kind) || isAutoIncrementType(col.TypeInfo.ToSqlType().Type())) { 203 return true, ErrNonAutoIncType 204 } 205 206 return false, nil 207 }) 208 209 return err 210 } 211 212 // MaxRowStorageSize returns the storage length for Dolt types. 213 func MaxRowStorageSize(sch sql.Schema) int64 { 214 var numBytesPerRow int64 = 0 215 for _, col := range sch { 216 switch n := col.Type.(type) { 217 case sql.NumberType: 218 numBytesPerRow += 8 219 case sql.StringType: 220 if gmstypes.IsTextBlob(n) { 221 numBytesPerRow += 20 222 } else { 223 numBytesPerRow += n.MaxByteLength() 224 } 225 case gmstypes.BitType: 226 numBytesPerRow += 8 227 case sql.DatetimeType: 228 numBytesPerRow += 8 229 case sql.DecimalType: 230 numBytesPerRow += int64(n.MaximumScale()) 231 case sql.EnumType: 232 numBytesPerRow += 2 233 case gmstypes.JsonType: 234 numBytesPerRow += 20 235 case sql.NullType: 236 numBytesPerRow += 1 237 case gmstypes.TimeType: 238 numBytesPerRow += 16 239 case sql.YearType: 240 numBytesPerRow += 8 241 default: 242 panic(fmt.Sprintf("unknown type in create table: %s", n.String())) 243 } 244 } 245 return numBytesPerRow 246 } 247 248 // isAutoIncrementKind returns true is |k| is a numeric kind. 249 func isAutoIncrementKind(k types.NomsKind) bool { 250 return k == types.IntKind || k == types.UintKind || k == types.FloatKind 251 } 252 253 // isAutoIncrementType returns true is |t| is a numeric type. 254 // This is an alternative way for the numeric type check. 255 func isAutoIncrementType(t query.Type) bool { 256 switch t { 257 case query.Type_INT8, query.Type_INT16, query.Type_INT24, query.Type_INT32, query.Type_INT64, 258 query.Type_UINT8, query.Type_UINT16, query.Type_UINT24, query.Type_UINT32, query.Type_UINT64, 259 query.Type_FLOAT32, query.Type_FLOAT64, query.Type_DECIMAL: 260 return true 261 default: 262 return false 263 } 264 } 265 266 // UnkeyedSchemaFromCols creates a schema without any primary keys to be used for displaying to users, tests, etc. Such 267 // unkeyed schemas are not suitable to be inserted into storage. 268 func UnkeyedSchemaFromCols(allCols *ColCollection) Schema { 269 var nonPKCols []Column 270 271 for _, c := range allCols.cols { 272 c.IsPartOfPK = false 273 c.Constraints = nil 274 nonPKCols = append(nonPKCols, c) 275 } 276 277 pkColColl := NewColCollection() 278 nonPKColColl := NewColCollection(nonPKCols...) 279 280 return &schemaImpl{ 281 pkCols: pkColColl, 282 nonPKCols: nonPKColColl, 283 allCols: nonPKColColl, 284 indexCollection: NewIndexCollection(nil, nil), 285 checkCollection: NewCheckCollection(), 286 collation: Collation_Default, 287 } 288 } 289 290 // SchemaFromPKAndNonPKCols creates a Schema from a collection of the key columns, and the non-key columns. 291 // 292 // Deprecated: Use NewSchema instead. 293 func SchemaFromPKAndNonPKCols(pkCols, nonPKCols *ColCollection) (Schema, error) { 294 allCols := make([]Column, pkCols.Size()+nonPKCols.Size()) 295 296 i := 0 297 for _, c := range pkCols.cols { 298 if !c.IsPartOfPK { 299 panic("bug: attempting to add a column to the pk that isn't part of the pk") 300 } 301 302 allCols[i] = c 303 i++ 304 } 305 306 for _, c := range nonPKCols.cols { 307 if c.IsPartOfPK { 308 panic("bug: attempting to add a column that is part of the pk to the non-pk columns") 309 } 310 311 allCols[i] = c 312 i++ 313 } 314 315 allColColl := NewColCollection(allCols...) 316 return SchemaFromColCollections(allColColl, pkCols, nonPKCols), nil 317 } 318 319 func (si *schemaImpl) GetComment() string { 320 return si.comment 321 } 322 323 func (si *schemaImpl) SetComment(comment string) { 324 si.comment = comment 325 } 326 327 // GetAllCols gets the collection of all columns (pk and non-pk) 328 func (si *schemaImpl) GetAllCols() *ColCollection { 329 return si.allCols 330 } 331 332 // GetNonPKCols gets the collection of columns which are not part of the primary key. 333 func (si *schemaImpl) GetNonPKCols() *ColCollection { 334 return si.nonPKCols 335 } 336 337 // GetPKCols gets the collection of columns which make the primary key. 338 func (si *schemaImpl) GetPKCols() *ColCollection { 339 return si.pkCols 340 } 341 342 func (si *schemaImpl) GetPkOrdinals() []int { 343 return si.pkOrdinals 344 } 345 346 func (si *schemaImpl) SetPkOrdinals(o []int) error { 347 if si.pkCols.Size() == 0 { 348 return nil 349 } else if o == nil || len(o) != si.pkCols.Size() { 350 var found int 351 if o == nil { 352 found = 0 353 } else { 354 found = len(o) 355 } 356 return fmt.Errorf("%w: expected '%d', found '%d'", ErrInvalidPkOrdinals, si.pkCols.Size(), found) 357 } 358 359 si.pkOrdinals = o 360 newPks := make([]Column, si.pkCols.Size()) 361 newPkTags := make([]uint64, si.pkCols.Size()) 362 for i, j := range si.pkOrdinals { 363 pkCol := si.allCols.GetByIndex(j) 364 newPks[i] = pkCol 365 newPkTags[i] = pkCol.Tag 366 } 367 si.pkCols = NewColCollection(newPks...) 368 return si.indexCollection.SetPks(newPkTags) 369 } 370 371 func (si *schemaImpl) String() string { 372 var b strings.Builder 373 writeColFn := func(tag uint64, col Column) (stop bool, err error) { 374 b.WriteString("tag: ") 375 b.WriteString(strconv.FormatUint(tag, 10)) 376 b.WriteString(", name: ") 377 b.WriteString(col.Name) 378 b.WriteString(", type: ") 379 b.WriteString(col.KindString()) 380 b.WriteString(",\n") 381 return false, nil 382 } 383 b.WriteString("pkCols: [") 384 err := si.pkCols.Iter(writeColFn) 385 386 if err != nil { 387 return err.Error() 388 } 389 390 b.WriteString("]\nnonPkCols: [") 391 err = si.nonPKCols.Iter(writeColFn) 392 393 if err != nil { 394 return err.Error() 395 } 396 397 b.WriteString("]") 398 return b.String() 399 } 400 401 func (si *schemaImpl) Indexes() IndexCollection { 402 return si.indexCollection 403 } 404 405 func (si *schemaImpl) Checks() CheckCollection { 406 return si.checkCollection 407 } 408 409 func (si schemaImpl) AddColumn(newCol Column, order *ColumnOrder) (Schema, error) { 410 if newCol.IsPartOfPK { 411 return nil, fmt.Errorf("cannot add a column with that is a primary key: %s", newCol.Name) 412 } 413 414 // preserve the primary key column names in their original order, which we'll need at the end 415 keyCols := make([]string, len(si.pkOrdinals)) 416 for i, ordinal := range si.pkOrdinals { 417 keyCols[i] = si.allCols.GetByIndex(ordinal).Name 418 } 419 420 var newCols []Column 421 var pkCols []Column 422 var nonPkCols []Column 423 424 if order != nil && order.First { 425 newCols = append(newCols, newCol) 426 nonPkCols = append(nonPkCols, newCol) 427 } 428 429 for _, col := range si.GetAllCols().GetColumns() { 430 newCols = append(newCols, col) 431 if col.IsPartOfPK { 432 pkCols = append(pkCols, col) 433 } else { 434 nonPkCols = append(nonPkCols, col) 435 } 436 437 if order != nil && order.AfterColumn == col.Name { 438 newCols = append(newCols, newCol) 439 nonPkCols = append(nonPkCols, newCol) 440 } 441 } 442 443 if order == nil { 444 newCols = append(newCols, newCol) 445 nonPkCols = append(nonPkCols, newCol) 446 } 447 448 collection := NewColCollection(newCols...) 449 si.allCols = collection 450 si.pkCols = NewColCollection(pkCols...) 451 si.nonPKCols = NewColCollection(nonPkCols...) 452 453 // This must be done after we have set the new column order 454 si.pkOrdinals = primaryKeyOrdinals(&si, keyCols) 455 456 err := ValidateForInsert(collection) 457 if err != nil { 458 return nil, err 459 } 460 461 return &si, nil 462 } 463 464 // GetMapDescriptors implements the Schema interface. 465 func (si *schemaImpl) GetMapDescriptors() (keyDesc, valueDesc val.TupleDesc) { 466 keyDesc = si.GetKeyDescriptor() 467 valueDesc = si.GetValueDescriptor() 468 return 469 } 470 471 // GetKeyDescriptor implements the Schema interface. 472 func (si *schemaImpl) GetKeyDescriptor() val.TupleDesc { 473 return si.getKeyColumnsDescriptor(true) 474 } 475 476 // GetKeyDescriptorWithNoConversion implements the Schema interface. 477 func (si *schemaImpl) GetKeyDescriptorWithNoConversion() val.TupleDesc { 478 return si.getKeyColumnsDescriptor(false) 479 } 480 481 func (si *schemaImpl) getKeyColumnsDescriptor(convertAddressColumns bool) val.TupleDesc { 482 if IsKeyless(si) { 483 return val.KeylessTupleDesc 484 } 485 486 contentHashedFields := make(map[uint64]struct{}) 487 for _, tag := range si.contentHashedFields { 488 contentHashedFields[tag] = struct{}{} 489 } 490 491 var tt []val.Type 492 var handlers []val.TupleTypeHandler 493 useCollations := false // We only use collations if a string exists 494 var collations []sql.CollationID 495 _ = si.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) { 496 sqlType := col.TypeInfo.ToSqlType() 497 queryType := sqlType.Type() 498 var t val.Type 499 500 contentHashedField := false 501 if _, ok := contentHashedFields[tag]; ok { 502 contentHashedField = true 503 } 504 505 if convertAddressColumns && !contentHashedField && queryType == query.Type_BLOB { 506 t = val.Type{ 507 Enc: val.Encoding(EncodingFromQueryType(query.Type_VARBINARY)), 508 Nullable: columnMissingNotNullConstraint(col), 509 } 510 } else if convertAddressColumns && !contentHashedField && queryType == query.Type_TEXT { 511 t = val.Type{ 512 Enc: val.Encoding(EncodingFromQueryType(query.Type_VARCHAR)), 513 Nullable: columnMissingNotNullConstraint(col), 514 } 515 } else if convertAddressColumns && !contentHashedField && queryType == query.Type_GEOMETRY { 516 t = val.Type{ 517 Enc: val.Encoding(serial.EncodingCell), 518 Nullable: columnMissingNotNullConstraint(col), 519 } 520 } else { 521 t = val.Type{ 522 Enc: val.Encoding(EncodingFromSqlType(sqlType)), 523 Nullable: columnMissingNotNullConstraint(col), 524 } 525 } 526 tt = append(tt, t) 527 if queryType == query.Type_CHAR || queryType == query.Type_VARCHAR || queryType == query.Type_TEXT { 528 useCollations = true 529 collations = append(collations, sqlType.(sql.StringType).Collation()) 530 } else { 531 collations = append(collations, sql.Collation_Unspecified) 532 } 533 534 if extendedType, ok := sqlType.(gmstypes.ExtendedType); ok { 535 handlers = append(handlers, extendedType) 536 } else { 537 handlers = append(handlers, nil) 538 } 539 return 540 }) 541 542 if useCollations { 543 if len(collations) != len(tt) { 544 panic(fmt.Errorf("cannot create tuple descriptor from %d collations and %d types", len(collations), len(tt))) 545 } 546 cmp := CollationTupleComparator{Collations: collations} 547 return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Comparator: cmp, Handlers: handlers}, tt...) 548 } else { 549 return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Handlers: handlers}, tt...) 550 } 551 } 552 553 // GetValueDescriptor implements the Schema interface. 554 func (si *schemaImpl) GetValueDescriptor() val.TupleDesc { 555 var tt []val.Type 556 var handlers []val.TupleTypeHandler 557 var collations []sql.CollationID 558 if IsKeyless(si) { 559 tt = []val.Type{val.KeylessCardType} 560 handlers = []val.TupleTypeHandler{nil} 561 collations = []sql.CollationID{sql.Collation_Unspecified} 562 } 563 564 useCollations := false // We only use collations if a string exists 565 _ = si.GetNonPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) { 566 if col.Virtual { 567 return 568 } 569 570 sqlType := col.TypeInfo.ToSqlType() 571 queryType := sqlType.Type() 572 tt = append(tt, val.Type{ 573 Enc: val.Encoding(EncodingFromSqlType(sqlType)), 574 Nullable: col.IsNullable(), 575 }) 576 if queryType == query.Type_CHAR || queryType == query.Type_VARCHAR { 577 useCollations = true 578 collations = append(collations, sqlType.(sql.StringType).Collation()) 579 } else { 580 collations = append(collations, sql.Collation_Unspecified) 581 } 582 583 if extendedType, ok := sqlType.(gmstypes.ExtendedType); ok { 584 handlers = append(handlers, extendedType) 585 } else { 586 handlers = append(handlers, nil) 587 } 588 return 589 }) 590 591 if useCollations { 592 if len(collations) != len(tt) { 593 panic(fmt.Errorf("cannot create tuple descriptor from %d collations and %d types", len(collations), len(tt))) 594 } 595 cmp := CollationTupleComparator{Collations: collations} 596 return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Comparator: cmp, Handlers: handlers}, tt...) 597 } else { 598 return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Handlers: handlers}, tt...) 599 } 600 } 601 602 // GetCollation implements the Schema interface. 603 func (si *schemaImpl) GetCollation() Collation { 604 // Schemas made before this change (and invalid schemas) will contain unspecified, so we'll the inherent collation 605 // instead (as that matches their behavior). 606 if si.collation == Collation_Unspecified { 607 return Collation_utf8mb4_0900_bin 608 } 609 return si.collation 610 } 611 612 // SetCollation implements the Schema interface. 613 func (si *schemaImpl) SetCollation(collation Collation) { 614 // Schemas made before this change may try to set this to unspecified, so we'll set it to the inherent collation. 615 if collation == Collation_Unspecified { 616 si.collation = Collation_utf8mb4_0900_bin 617 } else { 618 si.collation = collation 619 } 620 } 621 622 // indexOf returns the index of the given column in the overall schema 623 func (si *schemaImpl) indexOf(colName string) int { 624 i, idx := 0, -1 625 si.allCols.Iter(func(tag uint64, col Column) (stop bool, err error) { 626 if strings.ToLower(col.Name) == strings.ToLower(colName) { 627 idx = i 628 return true, nil 629 } 630 i++ 631 return false, nil 632 }) 633 634 return idx 635 } 636 637 // primaryKeyOrdinals returns the primary key ordinals for the schema given and the column names of the key columns 638 // given. 639 func primaryKeyOrdinals(sch *schemaImpl, keyCols []string) []int { 640 ordinals := make([]int, len(keyCols)) 641 for i, colName := range keyCols { 642 ordinals[i] = sch.indexOf(colName) 643 } 644 645 return ordinals 646 } 647 648 func columnMissingNotNullConstraint(col Column) bool { 649 for _, cnst := range col.Constraints { 650 if cnst.GetConstraintType() == NotNullConstraintType { 651 return false 652 } 653 } 654 return true 655 } 656 657 // Copy creates a copy of this schema safe to be edited independently. Some members, like column collections, are 658 // immutable and don't need to be copied. Others, like index and check collections, must be copied. 659 // We do this because it's cheaper to copy a schema than to deserialize one. 660 func (si schemaImpl) Copy() Schema { 661 pkOrds := make([]int, len(si.pkOrdinals)) 662 copy(pkOrds, si.pkOrdinals) 663 664 si.indexCollection = si.indexCollection.Copy() 665 si.checkCollection = si.checkCollection.Copy() 666 667 return &si 668 }