github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/migrate/transform.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package migrate 16 17 import ( 18 "context" 19 "fmt" 20 21 "github.com/dolthub/go-mysql-server/sql" 22 gmstypes "github.com/dolthub/go-mysql-server/sql/types" 23 "github.com/dolthub/vitess/go/vt/proto/query" 24 "golang.org/x/sync/errgroup" 25 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 27 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 28 "github.com/dolthub/dolt/go/libraries/doltcore/ref" 29 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 30 "github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo" 31 "github.com/dolthub/dolt/go/libraries/utils/set" 32 "github.com/dolthub/dolt/go/store/chunks" 33 "github.com/dolthub/dolt/go/store/datas" 34 "github.com/dolthub/dolt/go/store/hash" 35 "github.com/dolthub/dolt/go/store/prolly" 36 "github.com/dolthub/dolt/go/store/prolly/tree" 37 "github.com/dolthub/dolt/go/store/types" 38 "github.com/dolthub/dolt/go/store/val" 39 ) 40 41 var ( 42 flushRef = ref.NewInternalRef("migration-flush") 43 ) 44 45 func migrateWorkingSet(ctx context.Context, menv Environment, brRef ref.BranchRef, wsRef ref.WorkingSetRef, old, new *doltdb.DoltDB) error { 46 oldHead, err := old.ResolveCommitRef(ctx, brRef) 47 if err != nil { 48 return err 49 } 50 oldHeadRoot, err := oldHead.GetRootValue(ctx) 51 if err != nil { 52 return err 53 } 54 55 oldWs, err := old.ResolveWorkingSet(ctx, wsRef) 56 if err == doltdb.ErrWorkingSetNotFound { 57 // If a branch was created prior to dolt version 0.26.10, no working set will exist for it. 58 // In this case, we will pretend it exists with the same root as the head commit. 59 oldWs = doltdb.EmptyWorkingSet(wsRef) 60 oldWs = oldWs.WithWorkingRoot(oldHeadRoot).WithStagedRoot(oldHeadRoot) 61 } else if err != nil { 62 return err 63 } 64 65 newHead, err := new.ResolveCommitRef(ctx, brRef) 66 if err != nil { 67 return err 68 } 69 newHeadRoot, err := newHead.GetRootValue(ctx) 70 if err != nil { 71 return err 72 } 73 74 wr, err := migrateRoot(ctx, menv, oldHeadRoot, oldWs.WorkingRoot(), newHeadRoot) 75 if err != nil { 76 return err 77 } 78 79 sr, err := migrateRoot(ctx, menv, oldHeadRoot, oldWs.StagedRoot(), newHeadRoot) 80 if err != nil { 81 return err 82 } 83 84 err = validateRootValue(ctx, oldHeadRoot, oldWs.WorkingRoot(), wr) 85 if err != nil { 86 return err 87 } 88 89 err = validateRootValue(ctx, oldHeadRoot, oldWs.StagedRoot(), sr) 90 if err != nil { 91 return err 92 } 93 94 newWs := doltdb.EmptyWorkingSet(wsRef).WithWorkingRoot(wr).WithStagedRoot(sr) 95 96 return new.UpdateWorkingSet(ctx, wsRef, newWs, hash.Hash{}, oldWs.Meta(), nil) 97 } 98 99 func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error { 100 oldHash, err := oldCm.HashOf() 101 if err != nil { 102 return err 103 } 104 105 ok, err := prog.Has(ctx, oldHash) 106 if err != nil { 107 return err 108 } else if ok { 109 return nil 110 } 111 112 if oldCm.NumParents() == 0 { 113 return migrateInitCommit(ctx, oldCm, new, prog) 114 } 115 116 hs := oldHash.String() 117 prog.Log(ctx, "migrating commit %s", hs) 118 119 oldRoot, err := oldCm.GetRootValue(ctx) 120 if err != nil { 121 return err 122 } 123 124 optCmt, err := oldCm.GetParent(ctx, 0) 125 if err != nil { 126 return err 127 } 128 oldParentCm, ok := optCmt.ToCommit() 129 if !ok { 130 return doltdb.ErrGhostCommitEncountered 131 } 132 133 oldParentRoot, err := oldParentCm.GetRootValue(ctx) 134 if err != nil { 135 return err 136 } 137 138 oph, err := oldParentCm.HashOf() 139 if err != nil { 140 return err 141 } 142 ok, err = prog.Has(ctx, oph) 143 if err != nil { 144 return err 145 } else if !ok { 146 return fmt.Errorf("cannot find commit mapping for Commit (%s)", oph.String()) 147 } 148 149 newParentAddr, err := prog.Get(ctx, oph) 150 if err != nil { 151 return err 152 } 153 optCmt, err = new.ReadCommit(ctx, newParentAddr) 154 if err != nil { 155 return err 156 } 157 newParentCm, ok := optCmt.ToCommit() 158 if !ok { 159 return doltdb.ErrGhostCommitEncountered 160 } 161 162 newParentRoot, err := newParentCm.GetRootValue(ctx) 163 if err != nil { 164 return err 165 } 166 167 mRoot, err := migrateRoot(ctx, menv, oldParentRoot, oldRoot, newParentRoot) 168 if err != nil { 169 return err 170 } 171 _, addr, err := new.WriteRootValue(ctx, mRoot) 172 if err != nil { 173 return err 174 } 175 value, err := new.ValueReadWriter().ReadValue(ctx, addr) 176 if err != nil { 177 return err 178 } 179 180 opts, err := migrateCommitOptions(ctx, oldCm, prog) 181 if err != nil { 182 return err 183 } 184 185 migratedCm, err := new.CommitDangling(ctx, value, opts) 186 if err != nil { 187 return err 188 } 189 190 // update progress 191 newHash, err := migratedCm.HashOf() 192 if err != nil { 193 return err 194 } 195 if err = prog.Put(ctx, oldHash, newHash); err != nil { 196 return err 197 } 198 199 // flush ChunkStore 200 if err = new.SetHead(ctx, flushRef, newHash); err != nil { 201 return err 202 } 203 err = new.ShallowGC(ctx) 204 if err != nil && err != chunks.ErrUnsupportedOperation { 205 return err 206 } 207 208 // validate root after we flush the ChunkStore to facilitate 209 // investigating failed migrations 210 if err = validateRootValue(ctx, oldParentRoot, oldRoot, mRoot); err != nil { 211 return err 212 } 213 214 return nil 215 } 216 217 func migrateInitCommit(ctx context.Context, cm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error { 218 oldHash, err := cm.HashOf() 219 if err != nil { 220 return err 221 } 222 223 rv, err := doltdb.EmptyRootValue(ctx, new.ValueReadWriter(), new.NodeStore()) 224 if err != nil { 225 return err 226 } 227 228 meta, err := cm.GetCommitMeta(ctx) 229 if err != nil { 230 return err 231 } 232 datasDB := doltdb.HackDatasDatabaseFromDoltDB(new) 233 234 creation := ref.NewInternalRef(doltdb.CreationBranch) 235 ds, err := datasDB.GetDataset(ctx, creation.String()) 236 if err != nil { 237 return err 238 } 239 ds, err = datasDB.Commit(ctx, ds, rv.NomsValue(), datas.CommitOptions{Meta: meta}) 240 if err != nil { 241 return err 242 } 243 244 newCm, err := new.ResolveCommitRef(ctx, creation) 245 if err != nil { 246 return err 247 } 248 newHash, err := newCm.HashOf() 249 if err != nil { 250 return err 251 } 252 253 return prog.Put(ctx, oldHash, newHash) 254 } 255 256 func migrateCommitOptions(ctx context.Context, oldCm *doltdb.Commit, prog *progress) (datas.CommitOptions, error) { 257 parents, err := oldCm.ParentHashes(ctx) 258 if err != nil { 259 return datas.CommitOptions{}, err 260 } 261 if len(parents) == 0 { 262 panic("expected non-zero parents list") 263 } 264 265 for i := range parents { 266 migrated, err := prog.Get(ctx, parents[i]) 267 if err != nil { 268 return datas.CommitOptions{}, err 269 } 270 parents[i] = migrated 271 } 272 273 meta, err := oldCm.GetCommitMeta(ctx) 274 if err != nil { 275 return datas.CommitOptions{}, err 276 } 277 278 return datas.CommitOptions{ 279 Parents: parents, 280 Meta: meta, 281 }, nil 282 } 283 284 func migrateRoot(ctx context.Context, menv Environment, oldParent, oldRoot, newParent doltdb.RootValue) (doltdb.RootValue, error) { 285 migrated := newParent 286 287 fkc, err := oldRoot.GetForeignKeyCollection(ctx) 288 if err != nil { 289 return nil, err 290 } 291 292 migrated, err = migrated.PutForeignKeyCollection(ctx, fkc) 293 if err != nil { 294 return nil, err 295 } 296 297 removedTables, err := getRemovedTableNames(ctx, oldParent, oldRoot) 298 if err != nil { 299 return nil, err 300 } 301 302 migrated, err = migrated.RemoveTables(ctx, true, false, removedTables...) 303 if err != nil { 304 return nil, err 305 } 306 307 err = oldRoot.IterTables(ctx, func(name string, oldTbl *doltdb.Table, sch schema.Schema) (bool, error) { 308 ok, err := oldTbl.HasConflicts(ctx) 309 if err != nil { 310 return true, err 311 } else if ok && !menv.DropConflicts { 312 return true, fmt.Errorf("cannot migrate table with conflicts (%s)", name) 313 } 314 315 newSch, err := migrateSchema(ctx, name, sch) 316 if err != nil { 317 return true, err 318 } 319 if err = validateSchema(newSch); err != nil { 320 return true, err 321 } 322 323 // if there was a schema change in this commit, 324 // diff against an empty table and rewrite everything 325 var parentSch schema.Schema 326 327 oldParentTbl, ok, err := oldParent.GetTable(ctx, doltdb.TableName{Name: name}) 328 if err != nil { 329 return true, err 330 } 331 if ok { 332 parentSch, err = oldParentTbl.GetSchema(ctx) 333 if err != nil { 334 return true, err 335 } 336 } 337 if !ok || !schema.SchemasAreEqual(sch, parentSch) { 338 // provide empty table to diff against 339 oldParentTbl, err = doltdb.NewEmptyTable(ctx, oldParent.VRW(), oldParent.NodeStore(), sch) 340 if err != nil { 341 return true, err 342 } 343 } 344 345 newParentTbl, ok, err := newParent.GetTable(ctx, doltdb.TableName{Name: name}) 346 if err != nil { 347 return true, err 348 } 349 if !ok || !schema.SchemasAreEqual(sch, parentSch) { 350 // provide empty table to diff against 351 newParentTbl, err = doltdb.NewEmptyTable(ctx, newParent.VRW(), newParent.NodeStore(), newSch) 352 if err != nil { 353 return true, err 354 } 355 } 356 357 mtbl, err := migrateTable(ctx, newSch, oldParentTbl, oldTbl, newParentTbl) 358 if err != nil { 359 return true, err 360 } 361 362 migrated, err = migrated.PutTable(ctx, doltdb.TableName{Name: name}, mtbl) 363 if err != nil { 364 return true, err 365 } 366 return false, nil 367 }) 368 if err != nil { 369 return nil, err 370 } 371 372 return migrated, nil 373 } 374 375 // renames also get returned here 376 func getRemovedTableNames(ctx context.Context, prev, curr doltdb.RootValue) ([]string, error) { 377 prevNames, err := prev.GetTableNames(ctx, doltdb.DefaultSchemaName) 378 if err != nil { 379 return nil, err 380 } 381 tblNameSet := set.NewStrSet(prevNames) 382 currNames, err := curr.GetTableNames(ctx, doltdb.DefaultSchemaName) 383 if err != nil { 384 return nil, err 385 } 386 tblNameSet.Remove(currNames...) 387 return tblNameSet.AsSlice(), nil 388 } 389 390 func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTbl, newParentTbl *doltdb.Table) (*doltdb.Table, error) { 391 idx, err := oldParentTbl.GetRowData(ctx) 392 if err != nil { 393 return nil, err 394 } 395 oldParentRows := durable.NomsMapFromIndex(idx) 396 397 idx, err = oldTbl.GetRowData(ctx) 398 if err != nil { 399 return nil, err 400 } 401 oldRows := durable.NomsMapFromIndex(idx) 402 403 idx, err = newParentTbl.GetRowData(ctx) 404 if err != nil { 405 return nil, err 406 } 407 newParentRows := durable.ProllyMapFromIndex(idx) 408 409 oldParentSet, err := oldParentTbl.GetIndexSet(ctx) 410 if err != nil { 411 return nil, err 412 } 413 414 oldSet, err := oldTbl.GetIndexSet(ctx) 415 if err != nil { 416 return nil, err 417 } 418 419 newParentSet, err := newParentTbl.GetIndexSet(ctx) 420 if err != nil { 421 return nil, err 422 } 423 424 var newRows durable.Index 425 var newSet durable.IndexSet 426 originalCtx := ctx 427 eg, ctx := errgroup.WithContext(ctx) 428 429 eg.Go(func() error { 430 var merr error 431 newRows, merr = migrateIndex(ctx, newSch, oldParentRows, oldRows, newParentRows, newParentTbl.NodeStore()) 432 return merr 433 }) 434 435 vrw, ns := newParentTbl.ValueReadWriter(), newParentTbl.NodeStore() 436 eg.Go(func() error { 437 var merr error 438 newSet, merr = migrateIndexSet(ctx, newSch, oldParentSet, oldSet, newParentSet, vrw, ns) 439 return merr 440 }) 441 442 if err = eg.Wait(); err != nil { 443 return nil, err 444 } 445 446 ai, err := oldTbl.GetAutoIncrementValue(originalCtx) 447 if err != nil { 448 return nil, err 449 } 450 autoInc := types.Uint(ai) 451 452 return doltdb.NewTable(originalCtx, vrw, ns, newSch, newRows, newSet, autoInc) 453 } 454 455 func migrateSchema(ctx context.Context, tableName string, existing schema.Schema) (schema.Schema, error) { 456 // dolt_schemas and dolt_docs previously included columns with 457 // SQL type TEXT, but NomsKind of StringKind 458 if doltdb.HasDoltPrefix(tableName) { 459 var patched bool 460 cols := existing.GetAllCols().GetColumns() 461 for i, c := range cols { 462 qt := c.TypeInfo.ToSqlType().Type() 463 if qt == query.Type_TEXT && c.Kind == types.StringKind { 464 // NewColumn picks SQL type from NomsKind, converting this TEXT column to VARCHAR 465 cols[i] = schema.NewColumn(c.Name, c.Tag, c.Kind, c.IsPartOfPK, c.Constraints...) 466 patched = true 467 } 468 } 469 if patched { 470 allCols := schema.NewColCollection(cols...) 471 schema.NewIndexCollection(allCols, existing.GetPKCols()) 472 return schema.NewSchema( 473 allCols, 474 existing.GetPkOrdinals(), 475 existing.GetCollation(), 476 existing.Indexes(), 477 existing.Checks(), 478 ) 479 } 480 return existing, nil 481 } 482 483 // Blob types cannot be index keys in the new format: 484 // substitute VARCHAR(max) for TEXT, VARBINARY(max) for BLOB 485 // TODO: print warning to users 486 var patched bool 487 tags := schema.GetKeyColumnTags(existing) 488 cols := existing.GetAllCols().GetColumns() 489 for i, c := range cols { 490 if tags.Contains(c.Tag) { 491 var err error 492 switch c.TypeInfo.ToSqlType().Type() { 493 case query.Type_TEXT: 494 patched = true 495 info := typeinfo.StringDefaultType 496 cols[i], err = schema.NewColumnWithTypeInfo(c.Name, c.Tag, info, c.IsPartOfPK, c.Default, c.AutoIncrement, c.Comment, c.Constraints...) 497 case query.Type_BLOB: 498 patched = true 499 info := typeinfo.VarbinaryDefaultType 500 cols[i], err = schema.NewColumnWithTypeInfo(c.Name, c.Tag, info, c.IsPartOfPK, c.Default, c.AutoIncrement, c.Comment, c.Constraints...) 501 } 502 if err != nil { 503 return nil, err 504 } 505 } 506 } 507 508 // String types are sorted using a binary collation in __LD_1__ 509 // force-set collation to utf8mb4_0900_bin to match the order 510 for i, c := range cols { 511 st, ok := c.TypeInfo.ToSqlType().(sql.StringType) 512 if !ok { 513 continue 514 } 515 patched = true 516 517 var err error 518 switch st.Type() { 519 case query.Type_CHAR, query.Type_VARCHAR, query.Type_TEXT: 520 st, err = gmstypes.CreateString(st.Type(), st.Length(), sql.Collation_utf8mb4_0900_bin) 521 case query.Type_BINARY, query.Type_VARBINARY, query.Type_BLOB: 522 st, err = gmstypes.CreateString(st.Type(), st.Length(), sql.Collation_binary) 523 } 524 if err != nil { 525 return nil, err 526 } 527 528 info, err := typeinfo.FromSqlType(st) 529 if err != nil { 530 return nil, err 531 } 532 533 cols[i], err = schema.NewColumnWithTypeInfo(c.Name, c.Tag, info, c.IsPartOfPK, c.Default, c.AutoIncrement, c.Comment, c.Constraints...) 534 if err != nil { 535 return nil, err 536 } 537 } 538 539 if !patched { 540 return existing, nil 541 } 542 543 sch, err := schema.NewSchema( 544 schema.NewColCollection(cols...), 545 existing.GetPkOrdinals(), 546 existing.GetCollation(), 547 existing.Indexes(), 548 existing.Checks(), 549 ) 550 if err != nil { 551 return nil, err 552 } 553 554 return sch, nil 555 } 556 557 func migrateIndexSet( 558 ctx context.Context, 559 sch schema.Schema, 560 oldParentSet, oldSet, newParentSet durable.IndexSet, 561 vrw types.ValueReadWriter, ns tree.NodeStore, 562 ) (durable.IndexSet, error) { 563 newSet, err := durable.NewIndexSet(ctx, vrw, ns) 564 if err != nil { 565 return nil, err 566 } 567 for _, def := range sch.Indexes().AllIndexes() { 568 idx, err := oldParentSet.GetIndex(ctx, sch, def.Name()) 569 if err != nil { 570 return nil, err 571 } 572 oldParent := durable.NomsMapFromIndex(idx) 573 574 idx, err = oldSet.GetIndex(ctx, sch, def.Name()) 575 if err != nil { 576 return nil, err 577 } 578 old := durable.NomsMapFromIndex(idx) 579 580 idx, err = newParentSet.GetIndex(ctx, sch, def.Name()) 581 if err != nil { 582 return nil, err 583 } 584 newParent := durable.ProllyMapFromIndex(idx) 585 586 newIdx, err := migrateIndex(ctx, def.Schema(), oldParent, old, newParent, ns) 587 if err != nil { 588 return nil, err 589 } 590 591 newSet, err = newSet.PutIndex(ctx, def.Name(), newIdx) 592 if err != nil { 593 return nil, err 594 } 595 } 596 return newSet, nil 597 } 598 599 func migrateIndex( 600 ctx context.Context, 601 sch schema.Schema, 602 oldParent, oldMap types.Map, 603 newParent prolly.Map, 604 ns tree.NodeStore, 605 ) (durable.Index, error) { 606 607 eg, ctx := errgroup.WithContext(ctx) 608 differ := make(chan types.ValueChanged, 256) 609 writer := make(chan val.Tuple, 256) 610 611 kt, vt := tupleTranslatorsFromSchema(sch, ns) 612 613 // read old noms map 614 eg.Go(func() error { 615 defer close(differ) 616 return oldMap.Diff(ctx, oldParent, differ) 617 }) 618 619 // translate noms tuples to prolly tuples 620 eg.Go(func() error { 621 defer close(writer) 622 return translateTuples(ctx, kt, vt, differ, writer) 623 }) 624 625 var newMap prolly.Map 626 // write tuples in new prolly map 627 eg.Go(func() (err error) { 628 newMap, err = writeProllyMap(ctx, newParent, writer) 629 return 630 }) 631 632 if err := eg.Wait(); err != nil { 633 return nil, err 634 } 635 636 return durable.IndexFromProllyMap(newMap), nil 637 } 638 639 func translateTuples(ctx context.Context, kt, vt translator, differ <-chan types.ValueChanged, writer chan<- val.Tuple) error { 640 for { 641 var ( 642 diff types.ValueChanged 643 newKey val.Tuple 644 newVal val.Tuple 645 ok bool 646 err error 647 ) 648 649 select { 650 case diff, ok = <-differ: 651 if !ok { 652 return nil // done 653 } 654 case _ = <-ctx.Done(): 655 return ctx.Err() 656 } 657 658 switch diff.ChangeType { 659 case types.DiffChangeAdded: 660 fallthrough 661 662 case types.DiffChangeModified: 663 newVal, err = vt.TranslateTuple(ctx, diff.NewValue.(types.Tuple)) 664 if err != nil { 665 return err 666 } 667 fallthrough 668 669 case types.DiffChangeRemoved: 670 newKey, err = kt.TranslateTuple(ctx, diff.Key.(types.Tuple)) 671 if err != nil { 672 return err 673 } 674 } 675 676 select { 677 case writer <- newKey: 678 case _ = <-ctx.Done(): 679 return ctx.Err() 680 } 681 682 select { 683 case writer <- newVal: 684 case _ = <-ctx.Done(): 685 return ctx.Err() 686 } 687 } 688 } 689 690 func writeProllyMap(ctx context.Context, prev prolly.Map, writer <-chan val.Tuple) (m prolly.Map, err error) { 691 var ( 692 k, v val.Tuple 693 ok bool 694 ) 695 696 mut := prev.Mutate() 697 for { 698 select { 699 case k, ok = <-writer: 700 if !ok { 701 m, err = mut.Map(ctx) 702 return // done 703 } 704 case <-ctx.Done(): 705 return 706 } 707 708 select { 709 case v, ok = <-writer: 710 assertTrue(ok) 711 case <-ctx.Done(): 712 return 713 } 714 if err = mut.Put(ctx, k, v); err != nil { 715 return 716 } 717 } 718 }