github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/import_stmt.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package importccl 10 11 import ( 12 "context" 13 "fmt" 14 "io/ioutil" 15 "math" 16 "sort" 17 "strconv" 18 "strings" 19 20 "github.com/cockroachdb/cockroach/pkg/ccl/backupccl" 21 "github.com/cockroachdb/cockroach/pkg/clusterversion" 22 "github.com/cockroachdb/cockroach/pkg/jobs" 23 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 24 "github.com/cockroachdb/cockroach/pkg/jobs/jobsprotectedts" 25 "github.com/cockroachdb/cockroach/pkg/keys" 26 "github.com/cockroachdb/cockroach/pkg/kv" 27 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts" 28 "github.com/cockroachdb/cockroach/pkg/roachpb" 29 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 30 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 31 "github.com/cockroachdb/cockroach/pkg/sql" 32 "github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv" 33 "github.com/cockroachdb/cockroach/pkg/sql/catalog/resolver" 34 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 35 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 36 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 37 "github.com/cockroachdb/cockroach/pkg/sql/privilege" 38 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 39 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 40 "github.com/cockroachdb/cockroach/pkg/storage/cloud" 41 "github.com/cockroachdb/cockroach/pkg/util" 42 "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" 43 "github.com/cockroachdb/cockroach/pkg/util/hlc" 44 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 45 "github.com/cockroachdb/cockroach/pkg/util/log" 46 "github.com/cockroachdb/cockroach/pkg/util/retry" 47 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 48 "github.com/cockroachdb/cockroach/pkg/util/tracing" 49 "github.com/cockroachdb/cockroach/pkg/util/uuid" 50 "github.com/cockroachdb/errors" 51 ) 52 53 const ( 54 csvDelimiter = "delimiter" 55 csvComment = "comment" 56 csvNullIf = "nullif" 57 csvSkip = "skip" 58 csvStrictQuotes = "strict_quotes" 59 60 mysqlOutfileRowSep = "rows_terminated_by" 61 mysqlOutfileFieldSep = "fields_terminated_by" 62 mysqlOutfileEnclose = "fields_enclosed_by" 63 mysqlOutfileEscape = "fields_escaped_by" 64 65 importOptionSSTSize = "sstsize" 66 importOptionDecompress = "decompress" 67 importOptionOversample = "oversample" 68 importOptionSkipFKs = "skip_foreign_keys" 69 importOptionDisableGlobMatch = "disable_glob_matching" 70 importOptionSaveRejected = "experimental_save_rejected" 71 72 pgCopyDelimiter = "delimiter" 73 pgCopyNull = "nullif" 74 75 optMaxRowSize = "max_row_size" 76 77 // Turn on strict validation when importing avro records. 78 avroStrict = "strict_validation" 79 // Default input format is assumed to be OCF (object container file). 80 // This default can be changed by specified either of these options. 81 avroBinRecords = "data_as_binary_records" 82 avroJSONRecords = "data_as_json_records" 83 // Record separator; default "\n" 84 avroRecordsSeparatedBy = "records_terminated_by" 85 // If we are importing avro records (binary or JSON), we must specify schema 86 // as either an inline JSON schema, or an external schema URI. 87 avroSchema = "schema" 88 avroSchemaURI = "schema_uri" 89 ) 90 91 var importOptionExpectValues = map[string]sql.KVStringOptValidate{ 92 csvDelimiter: sql.KVStringOptRequireValue, 93 csvComment: sql.KVStringOptRequireValue, 94 csvNullIf: sql.KVStringOptRequireValue, 95 csvSkip: sql.KVStringOptRequireValue, 96 csvStrictQuotes: sql.KVStringOptRequireNoValue, 97 98 mysqlOutfileRowSep: sql.KVStringOptRequireValue, 99 mysqlOutfileFieldSep: sql.KVStringOptRequireValue, 100 mysqlOutfileEnclose: sql.KVStringOptRequireValue, 101 mysqlOutfileEscape: sql.KVStringOptRequireValue, 102 103 importOptionSSTSize: sql.KVStringOptRequireValue, 104 importOptionDecompress: sql.KVStringOptRequireValue, 105 importOptionOversample: sql.KVStringOptRequireValue, 106 importOptionSaveRejected: sql.KVStringOptRequireNoValue, 107 108 importOptionSkipFKs: sql.KVStringOptRequireNoValue, 109 importOptionDisableGlobMatch: sql.KVStringOptRequireNoValue, 110 111 optMaxRowSize: sql.KVStringOptRequireValue, 112 113 avroStrict: sql.KVStringOptRequireNoValue, 114 avroSchema: sql.KVStringOptRequireValue, 115 avroSchemaURI: sql.KVStringOptRequireValue, 116 avroRecordsSeparatedBy: sql.KVStringOptRequireValue, 117 avroBinRecords: sql.KVStringOptRequireNoValue, 118 avroJSONRecords: sql.KVStringOptRequireNoValue, 119 } 120 121 func makeStringSet(opts ...string) map[string]struct{} { 122 res := make(map[string]struct{}, len(opts)) 123 for _, opt := range opts { 124 res[opt] = struct{}{} 125 } 126 return res 127 } 128 129 // Options common to all formats. 130 var allowedCommonOptions = makeStringSet( 131 importOptionSSTSize, importOptionDecompress, importOptionOversample, 132 importOptionSaveRejected, importOptionDisableGlobMatch) 133 134 // Format specific allowed options. 135 var avroAllowedOptions = makeStringSet( 136 avroStrict, avroBinRecords, avroJSONRecords, 137 avroRecordsSeparatedBy, avroSchema, avroSchemaURI, optMaxRowSize, 138 ) 139 var csvAllowedOptions = makeStringSet( 140 csvDelimiter, csvComment, csvNullIf, csvSkip, csvStrictQuotes, 141 ) 142 var mysqlOutAllowedOptions = makeStringSet( 143 mysqlOutfileRowSep, mysqlOutfileFieldSep, mysqlOutfileEnclose, 144 mysqlOutfileEscape, csvNullIf, csvSkip, 145 ) 146 var mysqlDumpAllowedOptions = makeStringSet(importOptionSkipFKs) 147 var pgCopyAllowedOptions = makeStringSet(pgCopyDelimiter, pgCopyNull, optMaxRowSize) 148 var pgDumpAllowedOptions = makeStringSet(optMaxRowSize, importOptionSkipFKs) 149 150 func validateFormatOptions( 151 format string, specified map[string]string, formatAllowed map[string]struct{}, 152 ) error { 153 for opt := range specified { 154 if _, ok := formatAllowed[opt]; !ok { 155 if _, ok = allowedCommonOptions[opt]; !ok { 156 return errors.Errorf( 157 "invalid option %q specified for %s import format", opt, format) 158 } 159 } 160 } 161 return nil 162 } 163 164 func importJobDescription( 165 p sql.PlanHookState, 166 orig *tree.Import, 167 defs tree.TableDefs, 168 files []string, 169 opts map[string]string, 170 ) (string, error) { 171 stmt := *orig 172 stmt.CreateFile = nil 173 stmt.CreateDefs = defs 174 stmt.Files = nil 175 for _, file := range files { 176 clean, err := cloud.SanitizeExternalStorageURI(file, nil /* extraParams */) 177 if err != nil { 178 return "", err 179 } 180 stmt.Files = append(stmt.Files, tree.NewDString(clean)) 181 } 182 stmt.Options = nil 183 for k, v := range opts { 184 opt := tree.KVOption{Key: tree.Name(k)} 185 val := importOptionExpectValues[k] == sql.KVStringOptRequireValue 186 val = val || (importOptionExpectValues[k] == sql.KVStringOptAny && len(v) > 0) 187 if val { 188 opt.Value = tree.NewDString(v) 189 } 190 stmt.Options = append(stmt.Options, opt) 191 } 192 sort.Slice(stmt.Options, func(i, j int) bool { return stmt.Options[i].Key < stmt.Options[j].Key }) 193 ann := p.ExtendedEvalContext().Annotations 194 return tree.AsStringWithFQNames(&stmt, ann), nil 195 } 196 197 // importPlanHook implements sql.PlanHookFn. 198 func importPlanHook( 199 ctx context.Context, stmt tree.Statement, p sql.PlanHookState, 200 ) (sql.PlanHookRowFn, sqlbase.ResultColumns, []sql.PlanNode, bool, error) { 201 importStmt, ok := stmt.(*tree.Import) 202 if !ok { 203 return nil, nil, nil, false, nil 204 } 205 telemetry.Count("import.total.attempted") 206 207 if !p.ExecCfg().Settings.Version.IsActive(ctx, clusterversion.VersionPartitionedBackup) { 208 return nil, nil, nil, false, errors.Errorf("IMPORT requires a cluster fully upgraded to version >= 19.2") 209 } 210 211 filesFn, err := p.TypeAsStringArray(ctx, importStmt.Files, "IMPORT") 212 if err != nil { 213 return nil, nil, nil, false, err 214 } 215 216 var createFileFn func() (string, error) 217 if !importStmt.Bundle && !importStmt.Into && importStmt.CreateDefs == nil { 218 createFileFn, err = p.TypeAsString(ctx, importStmt.CreateFile, "IMPORT") 219 if err != nil { 220 return nil, nil, nil, false, err 221 } 222 } 223 224 optsFn, err := p.TypeAsStringOpts(ctx, importStmt.Options, importOptionExpectValues) 225 if err != nil { 226 return nil, nil, nil, false, err 227 } 228 229 fn := func(ctx context.Context, _ []sql.PlanNode, resultsCh chan<- tree.Datums) error { 230 // TODO(dan): Move this span into sql. 231 ctx, span := tracing.ChildSpan(ctx, importStmt.StatementTag()) 232 defer tracing.FinishSpan(span) 233 234 walltime := p.ExecCfg().Clock.Now().WallTime 235 236 if err := p.RequireAdminRole(ctx, "IMPORT"); err != nil { 237 return err 238 } 239 240 if !p.ExtendedEvalContext().TxnImplicit { 241 return errors.Errorf("IMPORT cannot be used inside a transaction") 242 } 243 244 opts, err := optsFn() 245 if err != nil { 246 return err 247 } 248 249 filenamePatterns, err := filesFn() 250 if err != nil { 251 return err 252 } 253 var files []string 254 if _, ok := opts[importOptionDisableGlobMatch]; ok { 255 files = filenamePatterns 256 } else { 257 for _, file := range filenamePatterns { 258 if cloud.URINeedsGlobExpansion(file) { 259 s, err := p.ExecCfg().DistSQLSrv.ExternalStorageFromURI(ctx, file) 260 if err != nil { 261 return err 262 } 263 expandedFiles, err := s.ListFiles(ctx, "") 264 if err != nil { 265 return err 266 } 267 if len(expandedFiles) < 1 { 268 return errors.Errorf(`no files matched uri provided: '%s'`, file) 269 } 270 files = append(files, expandedFiles...) 271 } else { 272 files = append(files, file) 273 } 274 } 275 } 276 277 table := importStmt.Table 278 279 var parentID sqlbase.ID 280 if table != nil { 281 // TODO: As part of work for #34240, we should be operating on 282 // UnresolvedObjectNames here, rather than TableNames. 283 // We have a target table, so it might specify a DB in its name. 284 un := table.ToUnresolvedObjectName() 285 found, prefix, descI, err := tree.ResolveTarget(ctx, 286 un, p, p.SessionData().Database, p.SessionData().SearchPath) 287 if err != nil { 288 return pgerror.Wrap(err, pgcode.UndefinedTable, 289 "resolving target import name") 290 } 291 table.ObjectNamePrefix = prefix 292 if !found { 293 // Check if database exists right now. It might not after the import is done, 294 // but it's better to fail fast than wait until restore. 295 return pgerror.Newf(pgcode.UndefinedObject, 296 "database does not exist: %q", table) 297 } 298 dbDesc := descI.(*sqlbase.DatabaseDescriptor) 299 // If this is a non-INTO import that will thus be making a new table, we 300 // need the CREATE priv in the target DB. 301 if !importStmt.Into { 302 if err := p.CheckPrivilege(ctx, dbDesc, privilege.CREATE); err != nil { 303 return err 304 } 305 } 306 parentID = dbDesc.ID 307 } else { 308 // No target table means we're importing whatever we find into the session 309 // database, so it must exist. 310 dbDesc, err := p.ResolveUncachedDatabaseByName(ctx, p.SessionData().Database, true /*required*/) 311 if err != nil { 312 return pgerror.Wrap(err, pgcode.UndefinedObject, 313 "could not resolve current database") 314 } 315 // If this is a non-INTO import that will thus be making a new table, we 316 // need the CREATE priv in the target DB. 317 if !importStmt.Into { 318 if err := p.CheckPrivilege(ctx, dbDesc, privilege.CREATE); err != nil { 319 return err 320 } 321 } 322 parentID = dbDesc.ID 323 } 324 325 format := roachpb.IOFileFormat{} 326 switch importStmt.FileFormat { 327 case "CSV": 328 if err = validateFormatOptions(importStmt.FileFormat, opts, csvAllowedOptions); err != nil { 329 return err 330 } 331 telemetry.Count("import.format.csv") 332 format.Format = roachpb.IOFileFormat_CSV 333 // Set the default CSV separator for the cases when it is not overwritten. 334 format.Csv.Comma = ',' 335 if override, ok := opts[csvDelimiter]; ok { 336 comma, err := util.GetSingleRune(override) 337 if err != nil { 338 return pgerror.Wrap(err, pgcode.Syntax, "invalid comma value") 339 } 340 format.Csv.Comma = comma 341 } 342 343 if override, ok := opts[csvComment]; ok { 344 comment, err := util.GetSingleRune(override) 345 if err != nil { 346 return pgerror.Wrap(err, pgcode.Syntax, "invalid comment value") 347 } 348 format.Csv.Comment = comment 349 } 350 351 if override, ok := opts[csvNullIf]; ok { 352 format.Csv.NullEncoding = &override 353 } 354 355 if override, ok := opts[csvSkip]; ok { 356 skip, err := strconv.Atoi(override) 357 if err != nil { 358 return pgerror.Wrapf(err, pgcode.Syntax, "invalid %s value", csvSkip) 359 } 360 if skip < 0 { 361 return pgerror.Newf(pgcode.Syntax, "%s must be >= 0", csvSkip) 362 } 363 format.Csv.Skip = uint32(skip) 364 } 365 if _, ok := opts[csvStrictQuotes]; ok { 366 format.Csv.StrictQuotes = true 367 } 368 if _, ok := opts[importOptionSaveRejected]; ok { 369 format.SaveRejected = true 370 } 371 case "DELIMITED": 372 if err = validateFormatOptions(importStmt.FileFormat, opts, mysqlOutAllowedOptions); err != nil { 373 return err 374 } 375 telemetry.Count("import.format.mysqlout") 376 format.Format = roachpb.IOFileFormat_MysqlOutfile 377 format.MysqlOut = roachpb.MySQLOutfileOptions{ 378 RowSeparator: '\n', 379 FieldSeparator: '\t', 380 } 381 if override, ok := opts[mysqlOutfileRowSep]; ok { 382 c, err := util.GetSingleRune(override) 383 if err != nil { 384 return pgerror.Wrapf(err, pgcode.Syntax, 385 "invalid %q value", mysqlOutfileRowSep) 386 } 387 format.MysqlOut.RowSeparator = c 388 } 389 390 if override, ok := opts[mysqlOutfileFieldSep]; ok { 391 c, err := util.GetSingleRune(override) 392 if err != nil { 393 return pgerror.Wrapf(err, pgcode.Syntax, "invalid %q value", mysqlOutfileFieldSep) 394 } 395 format.MysqlOut.FieldSeparator = c 396 } 397 398 if override, ok := opts[mysqlOutfileEnclose]; ok { 399 c, err := util.GetSingleRune(override) 400 if err != nil { 401 return pgerror.Wrapf(err, pgcode.Syntax, "invalid %q value", mysqlOutfileRowSep) 402 } 403 format.MysqlOut.Enclose = roachpb.MySQLOutfileOptions_Always 404 format.MysqlOut.Encloser = c 405 } 406 407 if override, ok := opts[mysqlOutfileEscape]; ok { 408 c, err := util.GetSingleRune(override) 409 if err != nil { 410 return pgerror.Wrapf(err, pgcode.Syntax, "invalid %q value", mysqlOutfileRowSep) 411 } 412 format.MysqlOut.HasEscape = true 413 format.MysqlOut.Escape = c 414 } 415 if override, ok := opts[csvSkip]; ok { 416 skip, err := strconv.Atoi(override) 417 if err != nil { 418 return pgerror.Wrapf(err, pgcode.Syntax, "invalid %s value", csvSkip) 419 } 420 if skip < 0 { 421 return pgerror.Newf(pgcode.Syntax, "%s must be >= 0", csvSkip) 422 } 423 format.MysqlOut.Skip = uint32(skip) 424 } 425 if override, ok := opts[csvNullIf]; ok { 426 format.MysqlOut.NullEncoding = &override 427 } 428 if _, ok := opts[importOptionSaveRejected]; ok { 429 format.SaveRejected = true 430 } 431 case "MYSQLDUMP": 432 if err = validateFormatOptions(importStmt.FileFormat, opts, mysqlDumpAllowedOptions); err != nil { 433 return err 434 } 435 telemetry.Count("import.format.mysqldump") 436 format.Format = roachpb.IOFileFormat_Mysqldump 437 case "PGCOPY": 438 if err = validateFormatOptions(importStmt.FileFormat, opts, pgCopyAllowedOptions); err != nil { 439 return err 440 } 441 telemetry.Count("import.format.pgcopy") 442 format.Format = roachpb.IOFileFormat_PgCopy 443 format.PgCopy = roachpb.PgCopyOptions{ 444 Delimiter: '\t', 445 Null: `\N`, 446 } 447 if override, ok := opts[pgCopyDelimiter]; ok { 448 c, err := util.GetSingleRune(override) 449 if err != nil { 450 return pgerror.Wrapf(err, pgcode.Syntax, "invalid %q value", pgCopyDelimiter) 451 } 452 format.PgCopy.Delimiter = c 453 } 454 if override, ok := opts[pgCopyNull]; ok { 455 format.PgCopy.Null = override 456 } 457 maxRowSize := int32(defaultScanBuffer) 458 if override, ok := opts[optMaxRowSize]; ok { 459 sz, err := humanizeutil.ParseBytes(override) 460 if err != nil { 461 return err 462 } 463 if sz < 1 || sz > math.MaxInt32 { 464 return errors.Errorf("%d out of range: %d", maxRowSize, sz) 465 } 466 maxRowSize = int32(sz) 467 } 468 format.PgCopy.MaxRowSize = maxRowSize 469 case "PGDUMP": 470 if err = validateFormatOptions(importStmt.FileFormat, opts, pgDumpAllowedOptions); err != nil { 471 return err 472 } 473 telemetry.Count("import.format.pgdump") 474 format.Format = roachpb.IOFileFormat_PgDump 475 maxRowSize := int32(defaultScanBuffer) 476 if override, ok := opts[optMaxRowSize]; ok { 477 sz, err := humanizeutil.ParseBytes(override) 478 if err != nil { 479 return err 480 } 481 if sz < 1 || sz > math.MaxInt32 { 482 return errors.Errorf("%d out of range: %d", maxRowSize, sz) 483 } 484 maxRowSize = int32(sz) 485 } 486 format.PgDump.MaxRowSize = maxRowSize 487 case "AVRO": 488 if err = validateFormatOptions(importStmt.FileFormat, opts, avroAllowedOptions); err != nil { 489 return err 490 } 491 err := parseAvroOptions(ctx, opts, p, &format) 492 if err != nil { 493 return err 494 } 495 default: 496 return unimplemented.Newf("import.format", "unsupported import format: %q", importStmt.FileFormat) 497 } 498 499 // sstSize, if 0, will be set to an appropriate default by the specific 500 // implementation (local or distributed) since each has different optimal 501 // settings. 502 var sstSize int64 503 if override, ok := opts[importOptionSSTSize]; ok { 504 sz, err := humanizeutil.ParseBytes(override) 505 if err != nil { 506 return err 507 } 508 sstSize = sz 509 } 510 var oversample int64 511 if override, ok := opts[importOptionOversample]; ok { 512 os, err := strconv.ParseInt(override, 10, 64) 513 if err != nil { 514 return err 515 } 516 oversample = os 517 } 518 519 var skipFKs bool 520 if _, ok := opts[importOptionSkipFKs]; ok { 521 skipFKs = true 522 } 523 524 if override, ok := opts[importOptionDecompress]; ok { 525 found := false 526 for name, value := range roachpb.IOFileFormat_Compression_value { 527 if strings.EqualFold(name, override) { 528 format.Compression = roachpb.IOFileFormat_Compression(value) 529 found = true 530 break 531 } 532 } 533 if !found { 534 return unimplemented.Newf("import.compression", "unsupported compression value: %q", override) 535 } 536 } 537 538 var tableDetails []jobspb.ImportDetails_Table 539 jobDesc, err := importJobDescription(p, importStmt, nil, filenamePatterns, opts) 540 if err != nil { 541 return err 542 } 543 544 if importStmt.Into { 545 // TODO(dt): this is a prototype for incremental import but there are many 546 // TODOs remaining before it is ready to graduate to prime-time. Some of 547 // them are captured in specific TODOs below, but some of the big, scary 548 // things to do are: 549 // - review planner vs txn use very carefully. We should try to get to a 550 // single txn used to plan the job and create it. Using the planner's 551 // txn today is very wrong since it will not commit until after the job 552 // has run, so starting a job based on reads it returned is very wrong. 553 // - audit every place that we resolve/lease/read table descs to be sure 554 // that the IMPORTING state is handled correctly. SQL lease acquisition 555 // is probably the easy one here since it has single read path -- the 556 // things that read directly like the queues or background jobs are the 557 // ones we'll need to really carefully look though. 558 // - Look at if/how cleanup/rollback works. Reconsider the cpu from the 559 // desc version (perhaps we should be re-reading instead?). 560 // - Write _a lot_ of tests. 561 found, err := p.ResolveMutableTableDescriptor(ctx, table, true, resolver.ResolveRequireTableDesc) 562 if err != nil { 563 return err 564 } 565 566 // TODO(dt): checking *CREATE* on an *existing table* is weird. 567 if err := p.CheckPrivilege(ctx, found, privilege.CREATE); err != nil { 568 return err 569 } 570 571 // IMPORT INTO does not currently support interleaved tables. 572 if found.IsInterleaved() { 573 // TODO(miretskiy): Handle import into when tables are interleaved. 574 return pgerror.New(pgcode.FeatureNotSupported, "Cannot use IMPORT INTO with interleaved tables") 575 } 576 577 // Validate target columns. 578 var intoCols []string 579 var isTargetCol = make(map[string]bool) 580 for _, name := range importStmt.IntoCols { 581 active, err := found.FindActiveColumnsByNames(tree.NameList{name}) 582 if err != nil { 583 return errors.Wrap(err, "verifying target columns") 584 } 585 586 isTargetCol[active[0].Name] = true 587 intoCols = append(intoCols, active[0].Name) 588 } 589 590 // IMPORT INTO does not support columns with DEFAULT expressions. Ensure 591 // that all non-target columns are nullable until we support DEFAULT 592 // expressions. 593 for _, col := range found.VisibleColumns() { 594 if col.HasDefault() { 595 return errors.Errorf("cannot IMPORT INTO a table with a DEFAULT expression for any of its columns") 596 } 597 598 if len(isTargetCol) != 0 && !isTargetCol[col.Name] && !col.IsNullable() { 599 return errors.Errorf("all non-target columns in IMPORT INTO must be nullable") 600 } 601 } 602 603 tableDetails = []jobspb.ImportDetails_Table{{Desc: &found.TableDescriptor, IsNew: false, TargetCols: intoCols}} 604 } else { 605 var tableDescs []*sqlbase.TableDescriptor 606 seqVals := make(map[sqlbase.ID]int64) 607 608 if importStmt.Bundle { 609 store, err := p.ExecCfg().DistSQLSrv.ExternalStorageFromURI(ctx, files[0]) 610 if err != nil { 611 return err 612 } 613 defer store.Close() 614 615 raw, err := store.ReadFile(ctx, "") 616 if err != nil { 617 return err 618 } 619 defer raw.Close() 620 reader, err := decompressingReader(raw, files[0], format.Compression) 621 if err != nil { 622 return err 623 } 624 defer reader.Close() 625 626 var match string 627 if table != nil { 628 match = table.ObjectName.String() 629 } 630 631 fks := fkHandler{skip: skipFKs, allowed: true, resolver: make(fkResolver)} 632 switch format.Format { 633 case roachpb.IOFileFormat_Mysqldump: 634 evalCtx := &p.ExtendedEvalContext().EvalContext 635 tableDescs, err = readMysqlCreateTable(ctx, reader, evalCtx, p, defaultCSVTableID, parentID, match, fks, seqVals) 636 case roachpb.IOFileFormat_PgDump: 637 evalCtx := &p.ExtendedEvalContext().EvalContext 638 tableDescs, err = readPostgresCreateTable(ctx, reader, evalCtx, p, match, parentID, walltime, fks, int(format.PgDump.MaxRowSize)) 639 default: 640 return errors.Errorf("non-bundle format %q does not support reading schemas", format.Format.String()) 641 } 642 if err != nil { 643 return err 644 } 645 if tableDescs == nil && table != nil { 646 return errors.Errorf("table definition not found for %q", table.ObjectName.String()) 647 } 648 } else { 649 if table == nil { 650 return errors.Errorf("non-bundle format %q should always have a table name", importStmt.FileFormat) 651 } 652 var create *tree.CreateTable 653 if importStmt.CreateDefs != nil { 654 create = &tree.CreateTable{ 655 Table: *importStmt.Table, 656 Defs: importStmt.CreateDefs, 657 } 658 } else { 659 filename, err := createFileFn() 660 if err != nil { 661 return err 662 } 663 create, err = readCreateTableFromStore(ctx, filename, p.ExecCfg().DistSQLSrv.ExternalStorageFromURI) 664 if err != nil { 665 return err 666 } 667 668 if table.ObjectName != create.Table.ObjectName { 669 return errors.Errorf( 670 "importing table %s, but file specifies a schema for table %s", 671 table.ObjectName, create.Table.ObjectName, 672 ) 673 } 674 } 675 676 tbl, err := MakeSimpleTableDescriptor( 677 ctx, p.ExecCfg().Settings, create, parentID, defaultCSVTableID, NoFKs, walltime) 678 if err != nil { 679 return err 680 } 681 tableDescs = []*sqlbase.TableDescriptor{tbl.TableDesc()} 682 descStr, err := importJobDescription(p, importStmt, create.Defs, filenamePatterns, opts) 683 if err != nil { 684 return err 685 } 686 jobDesc = descStr 687 } 688 689 tableDetails = make([]jobspb.ImportDetails_Table, len(tableDescs)) 690 for i := range tableDescs { 691 tableDetails[i] = jobspb.ImportDetails_Table{Desc: tableDescs[i], SeqVal: seqVals[tableDescs[i].ID], IsNew: true} 692 } 693 } 694 695 telemetry.CountBucketed("import.files", int64(len(files))) 696 697 // Here we create the job and protected timestamp records in a side 698 // transaction and then kick off the job. This is awful. Rather we should be 699 // disallowing this statement in an explicit transaction and then we should 700 // create the job in the user's transaction here and then in a post-commit 701 // hook we should kick of the StartableJob which we attached to the 702 // connExecutor somehow. 703 704 importDetails := jobspb.ImportDetails{ 705 URIs: files, 706 Format: format, 707 ParentID: parentID, 708 Tables: tableDetails, 709 SSTSize: sstSize, 710 Oversample: oversample, 711 SkipFKs: skipFKs, 712 } 713 714 // Prepare the protected timestamp record. 715 var spansToProtect []roachpb.Span 716 codec := p.(sql.PlanHookState).ExecCfg().Codec 717 for i := range tableDetails { 718 if td := &tableDetails[i]; !td.IsNew { 719 spansToProtect = append(spansToProtect, td.Desc.TableSpan(codec)) 720 } 721 } 722 if len(spansToProtect) > 0 { 723 protectedtsID := uuid.MakeV4() 724 importDetails.ProtectedTimestampRecord = &protectedtsID 725 } 726 jr := jobs.Record{ 727 Description: jobDesc, 728 Username: p.User(), 729 Details: importDetails, 730 Progress: jobspb.ImportProgress{}, 731 } 732 733 var sj *jobs.StartableJob 734 if err := p.ExecCfg().DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) (err error) { 735 sj, err = p.ExecCfg().JobRegistry.CreateStartableJobWithTxn(ctx, jr, txn, resultsCh) 736 if err != nil { 737 return err 738 } 739 740 if len(spansToProtect) > 0 { 741 // NB: We protect the timestamp preceding the import statement timestamp 742 // because that's the timestamp to which we want to revert. 743 tsToProtect := hlc.Timestamp{WallTime: walltime}.Prev() 744 rec := jobsprotectedts.MakeRecord(*importDetails.ProtectedTimestampRecord, 745 *sj.ID(), tsToProtect, spansToProtect) 746 return p.ExecCfg().ProtectedTimestampProvider.Protect(ctx, txn, rec) 747 } 748 return nil 749 }); err != nil { 750 if sj != nil { 751 if cleanupErr := sj.CleanupOnRollback(ctx); cleanupErr != nil { 752 log.Warningf(ctx, "failed to cleanup StartableJob: %v", cleanupErr) 753 } 754 } 755 return err 756 } 757 return sj.Run(ctx) 758 } 759 return fn, backupccl.RestoreHeader, nil, false, nil 760 } 761 762 func parseAvroOptions( 763 ctx context.Context, opts map[string]string, p sql.PlanHookState, format *roachpb.IOFileFormat, 764 ) error { 765 telemetry.Count("import.format.avro") 766 format.Format = roachpb.IOFileFormat_Avro 767 768 // Default input format is OCF. 769 format.Avro.Format = roachpb.AvroOptions_OCF 770 _, format.Avro.StrictMode = opts[avroStrict] 771 772 _, haveBinRecs := opts[avroBinRecords] 773 _, haveJSONRecs := opts[avroJSONRecords] 774 775 if haveBinRecs && haveJSONRecs { 776 return errors.Errorf("only one of the %s or %s options can be set", avroBinRecords, avroJSONRecords) 777 } 778 779 if haveBinRecs || haveJSONRecs { 780 // Input is a "records" format. 781 if haveBinRecs { 782 format.Avro.Format = roachpb.AvroOptions_BIN_RECORDS 783 } else { 784 format.Avro.Format = roachpb.AvroOptions_JSON_RECORDS 785 } 786 787 // Set record separator. 788 format.Avro.RecordSeparator = '\n' 789 if override, ok := opts[avroRecordsSeparatedBy]; ok { 790 c, err := util.GetSingleRune(override) 791 if err != nil { 792 return pgerror.Wrapf(err, pgcode.Syntax, 793 "invalid %q value", avroRecordsSeparatedBy) 794 } 795 format.Avro.RecordSeparator = c 796 } 797 798 // See if inline schema is specified. 799 format.Avro.SchemaJSON = opts[avroSchema] 800 801 if len(format.Avro.SchemaJSON) == 0 { 802 // Inline schema not set; We must have external schema. 803 uri, ok := opts[avroSchemaURI] 804 if !ok { 805 return errors.Errorf( 806 "either %s or %s option must be set when importing avro record files", avroSchema, avroSchemaURI) 807 } 808 809 store, err := p.ExecCfg().DistSQLSrv.ExternalStorageFromURI(ctx, uri) 810 if err != nil { 811 return err 812 } 813 defer store.Close() 814 815 raw, err := store.ReadFile(ctx, "") 816 if err != nil { 817 return err 818 } 819 defer raw.Close() 820 schemaBytes, err := ioutil.ReadAll(raw) 821 if err != nil { 822 return err 823 } 824 format.Avro.SchemaJSON = string(schemaBytes) 825 } 826 827 if override, ok := opts[optMaxRowSize]; ok { 828 sz, err := humanizeutil.ParseBytes(override) 829 if err != nil { 830 return err 831 } 832 if sz < 1 || sz > math.MaxInt32 { 833 return errors.Errorf("%s out of range: %d", override, sz) 834 } 835 format.Avro.MaxRecordSize = int32(sz) 836 } 837 } 838 return nil 839 } 840 841 type importResumer struct { 842 job *jobs.Job 843 settings *cluster.Settings 844 res backupccl.RowCount 845 846 testingKnobs struct { 847 afterImport func(summary backupccl.RowCount) error 848 alwaysFlushJobProgress bool 849 ignoreProtectedTimestamps bool 850 } 851 } 852 853 // Prepares descriptors for newly created tables being imported into. 854 func prepareNewTableDescsForIngestion( 855 ctx context.Context, 856 txn *kv.Txn, 857 p sql.PlanHookState, 858 tables []jobspb.ImportDetails_Table, 859 parentID sqlbase.ID, 860 ) ([]*sqlbase.TableDescriptor, error) { 861 var tableDescs []*sqlbase.TableDescriptor 862 for _, i := range tables { 863 if err := backupccl.CheckTableExists(ctx, txn, p.ExecCfg().Codec, parentID, i.Desc.Name); err != nil { 864 return nil, err 865 } 866 tableDescs = append(tableDescs, i.Desc) 867 } 868 869 // Verification steps have passed, generate a new table ID if we're 870 // restoring. We do this last because we want to avoid calling 871 // GenerateUniqueDescID if there's any kind of error above. 872 // Reserving a table ID now means we can avoid the rekey work during restore. 873 tableRewrites := make(backupccl.TableRewriteMap) 874 seqVals := make(map[sqlbase.ID]int64, len(tables)) 875 for _, tableDesc := range tables { 876 id, err := catalogkv.GenerateUniqueDescID(ctx, p.ExecCfg().DB, p.ExecCfg().Codec) 877 if err != nil { 878 return nil, err 879 } 880 tableRewrites[tableDesc.Desc.ID] = &jobspb.RestoreDetails_TableRewrite{ 881 TableID: id, 882 ParentID: parentID, 883 } 884 seqVals[id] = tableDesc.SeqVal 885 } 886 887 if err := backupccl.RewriteTableDescs(tableDescs, tableRewrites, ""); err != nil { 888 return nil, err 889 } 890 891 for i := range tableDescs { 892 tableDescs[i].State = sqlbase.TableDescriptor_OFFLINE 893 tableDescs[i].OfflineReason = "importing" 894 895 } 896 897 var seqValKVs []roachpb.KeyValue 898 for i := range tableDescs { 899 if v, ok := seqVals[tableDescs[i].ID]; ok && v != 0 { 900 key, val, err := sql.MakeSequenceKeyVal(p.ExecCfg().Codec, tableDescs[i], v, false) 901 if err != nil { 902 return nil, err 903 } 904 kv := roachpb.KeyValue{Key: key} 905 kv.Value.SetInt(val) 906 seqValKVs = append(seqValKVs, kv) 907 } 908 } 909 910 // Write the new TableDescriptors and flip the namespace entries over to 911 // them. After this call, any queries on a table will be served by the newly 912 // imported data. 913 if err := backupccl.WriteTableDescs(ctx, txn, nil /* databases */, tableDescs, tree.RequestedDescriptors, p.ExecCfg().Settings, seqValKVs); err != nil { 914 return nil, errors.Wrapf(err, "creating tables") 915 } 916 917 return tableDescs, nil 918 } 919 920 // Prepares descriptors for existing tables being imported into. 921 func prepareExistingTableDescForIngestion( 922 ctx context.Context, txn *kv.Txn, execCfg *sql.ExecutorConfig, desc *sqlbase.TableDescriptor, 923 ) (*sqlbase.TableDescriptor, error) { 924 if len(desc.Mutations) > 0 { 925 return nil, errors.Errorf("cannot IMPORT INTO a table with schema changes in progress -- try again later (pending mutation %s)", desc.Mutations[0].String()) 926 } 927 928 // TODO(dt): Ensure no other schema changes can start during ingest. 929 importing := *desc 930 importing.Version++ 931 // Take the table offline for import. 932 // TODO(dt): audit everywhere we get table descs (leases or otherwise) to 933 // ensure that filtering by state handles IMPORTING correctly. 934 importing.State = sqlbase.TableDescriptor_OFFLINE 935 importing.OfflineReason = "importing" 936 // TODO(dt): de-validate all the FKs. 937 938 if err := txn.SetSystemConfigTrigger(); err != nil { 939 return nil, err 940 } 941 942 // Note that this CPut is safe with respect to mixed-version descriptor 943 // upgrade and downgrade, because IMPORT does not operate in mixed-version 944 // states. 945 // TODO(jordan,lucy): remove this comment once 19.2 is released. 946 existingDesc, err := sqlbase.ConditionalGetTableDescFromTxn(ctx, txn, execCfg.Codec, desc) 947 if err != nil { 948 return nil, errors.Wrap(err, "another operation is currently operating on the table") 949 } 950 err = txn.CPut(ctx, 951 sqlbase.MakeDescMetadataKey(keys.SystemSQLCodec, desc.ID), 952 sqlbase.WrapDescriptor(&importing), 953 existingDesc) 954 if err != nil { 955 return nil, errors.Wrap(err, "another operation is currently operating on the table") 956 } 957 958 return &importing, nil 959 // NB: we need to wait for the schema change to show up before it is safe 960 // to ingest, but rather than do that here, we'll wait for this schema 961 // change in the job's Resume hook, before running the ingest phase. That 962 // will hopefully let it get a head start on propagating, plus the more we 963 // do in the job, the more that has automatic cleanup on rollback. 964 } 965 966 // prepareTableDescsForIngestion prepares table descriptors for the ingestion 967 // step of import. The descriptors are in an IMPORTING state (offline) on 968 // successful completion of this method. 969 func (r *importResumer) prepareTableDescsForIngestion( 970 ctx context.Context, p sql.PlanHookState, details jobspb.ImportDetails, 971 ) error { 972 err := p.ExecCfg().DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 973 974 importDetails := details 975 importDetails.Tables = make([]jobspb.ImportDetails_Table, len(details.Tables)) 976 977 newTableDescToIdx := make(map[*sqlbase.TableDescriptor]int, len(importDetails.Tables)) 978 var hasExistingTables bool 979 var err error 980 var newTableDescs []jobspb.ImportDetails_Table 981 var desc *sqlbase.TableDescriptor 982 for i, table := range details.Tables { 983 if !table.IsNew { 984 desc, err = prepareExistingTableDescForIngestion(ctx, txn, p.ExecCfg(), table.Desc) 985 if err != nil { 986 return err 987 } 988 importDetails.Tables[i] = jobspb.ImportDetails_Table{Desc: desc, Name: table.Name, 989 SeqVal: table.SeqVal, 990 IsNew: table.IsNew, 991 TargetCols: table.TargetCols} 992 993 hasExistingTables = true 994 } else { 995 newTableDescToIdx[table.Desc] = i 996 newTableDescs = append(newTableDescs, table) 997 } 998 } 999 1000 // Prepare the table descriptors for newly created tables being imported 1001 // into. 1002 // 1003 // TODO(adityamaru): This is still unnecessarily complicated. If we can get 1004 // the new table desc preparation to work on a per desc basis, rather than 1005 // requiring all the newly created descriptors, then this can look like the 1006 // call to prepareExistingTableDescForIngestion. Currently, FK references 1007 // misbehave when I tried to write the desc one at a time. 1008 if len(newTableDescs) != 0 { 1009 res, err := prepareNewTableDescsForIngestion(ctx, txn, p, newTableDescs, importDetails.ParentID) 1010 if err != nil { 1011 return err 1012 } 1013 for i, table := range res { 1014 importDetails.Tables[i] = jobspb.ImportDetails_Table{Desc: table, 1015 Name: details.Tables[i].Name, 1016 SeqVal: details.Tables[i].SeqVal, 1017 IsNew: details.Tables[i].IsNew, 1018 TargetCols: details.Tables[i].TargetCols} 1019 } 1020 } 1021 1022 importDetails.PrepareComplete = true 1023 1024 // If we do not have pending schema changes on existing descriptors we can 1025 // choose our Walltime (to IMPORT from) immediately. Otherwise, we have to 1026 // wait for all nodes to see the same descriptor version before doing so. 1027 if !hasExistingTables { 1028 importDetails.Walltime = p.ExecCfg().Clock.Now().WallTime 1029 } else { 1030 importDetails.Walltime = 0 1031 } 1032 1033 // Update the job once all descs have been prepared for ingestion. 1034 err = r.job.WithTxn(txn).SetDetails(ctx, importDetails) 1035 1036 return err 1037 }) 1038 return err 1039 } 1040 1041 // Resume is part of the jobs.Resumer interface. 1042 func (r *importResumer) Resume( 1043 ctx context.Context, phs interface{}, resultsCh chan<- tree.Datums, 1044 ) error { 1045 telemetry.Count("import.total.started") 1046 1047 details := r.job.Details().(jobspb.ImportDetails) 1048 p := phs.(sql.PlanHookState) 1049 ptsID := details.ProtectedTimestampRecord 1050 if ptsID != nil && !r.testingKnobs.ignoreProtectedTimestamps { 1051 if err := p.ExecCfg().ProtectedTimestampProvider.Verify(ctx, *ptsID); err != nil { 1052 if errors.Is(err, protectedts.ErrNotExists) { 1053 // No reason to return an error which might cause problems if it doesn't 1054 // seem to exist. 1055 log.Warningf(ctx, "failed to release protected which seems not to exist: %v", err) 1056 } else { 1057 return err 1058 } 1059 } 1060 } 1061 1062 tables := make(map[string]*execinfrapb.ReadImportDataSpec_ImportTable, len(details.Tables)) 1063 if details.Tables != nil { 1064 // Skip prepare stage on job resumption, if it has already been completed. 1065 if !details.PrepareComplete { 1066 if err := r.prepareTableDescsForIngestion(ctx, p, details); err != nil { 1067 return err 1068 } 1069 1070 // Re-initialize details after prepare step. 1071 details = r.job.Details().(jobspb.ImportDetails) 1072 } 1073 1074 for _, i := range details.Tables { 1075 if i.Name != "" { 1076 tables[i.Name] = &execinfrapb.ReadImportDataSpec_ImportTable{Desc: i.Desc, TargetCols: i.TargetCols} 1077 } else if i.Desc != nil { 1078 tables[i.Desc.Name] = &execinfrapb.ReadImportDataSpec_ImportTable{Desc: i.Desc, TargetCols: i.TargetCols} 1079 } else { 1080 return errors.Errorf("invalid table specification") 1081 } 1082 } 1083 } 1084 1085 // In the case of importing into existing tables we must wait for all nodes 1086 // to see the same version of the updated table descriptor, after which we 1087 // shall chose a ts to import from. 1088 if details.Walltime == 0 { 1089 // TODO(dt): update job status to mention waiting for tables to go offline. 1090 for _, i := range details.Tables { 1091 if _, err := p.ExecCfg().LeaseManager.WaitForOneVersion(ctx, i.Desc.ID, retry.Options{}); err != nil { 1092 return err 1093 } 1094 } 1095 1096 details.Walltime = p.ExecCfg().Clock.Now().WallTime 1097 if err := r.job.WithTxn(nil).SetDetails(ctx, details); err != nil { 1098 return err 1099 } 1100 } 1101 1102 walltime := details.Walltime 1103 files := details.URIs 1104 format := details.Format 1105 1106 res, err := sql.DistIngest(ctx, p, r.job, tables, files, format, walltime, r.testingKnobs.alwaysFlushJobProgress) 1107 if err != nil { 1108 return err 1109 } 1110 pkIDs := make(map[uint64]struct{}, len(details.Tables)) 1111 for _, t := range details.Tables { 1112 pkIDs[roachpb.BulkOpSummaryID(uint64(t.Desc.ID), uint64(t.Desc.PrimaryIndex.ID))] = struct{}{} 1113 } 1114 r.res.DataSize = res.DataSize 1115 for id, count := range res.EntryCounts { 1116 if _, ok := pkIDs[id]; ok { 1117 r.res.Rows += count 1118 } else { 1119 r.res.IndexEntries += count 1120 } 1121 } 1122 if r.testingKnobs.afterImport != nil { 1123 if err := r.testingKnobs.afterImport(r.res); err != nil { 1124 return err 1125 } 1126 } 1127 1128 if err := r.publishTables(ctx, p.ExecCfg()); err != nil { 1129 return err 1130 } 1131 // TODO(ajwerner): Should this actually return the error? At this point we've 1132 // successfully finished the import but failed to drop the protected 1133 // timestamp. The reconciliation loop ought to pick it up. 1134 if ptsID != nil && !r.testingKnobs.ignoreProtectedTimestamps { 1135 if err := p.ExecCfg().DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1136 return r.releaseProtectedTimestamp(ctx, txn, p.ExecCfg().ProtectedTimestampProvider) 1137 }); err != nil { 1138 log.Errorf(ctx, "failed to release protected timestamp: %v", err) 1139 } 1140 } 1141 1142 telemetry.Count("import.total.succeeded") 1143 telemetry.CountBucketed("import.rows", r.res.Rows) 1144 const mb = 1 << 20 1145 sizeMb := r.res.DataSize / mb 1146 telemetry.CountBucketed("import.size-mb", sizeMb) 1147 1148 sec := int64(timeutil.Since(timeutil.FromUnixMicros(r.job.Payload().StartedMicros)).Seconds()) 1149 var mbps int64 1150 if sec > 0 { 1151 mbps = mb / sec 1152 } 1153 telemetry.CountBucketed("import.duration-sec.succeeded", sec) 1154 telemetry.CountBucketed("import.speed-mbps", mbps) 1155 // Tiny imports may skew throughput numbers due to overhead. 1156 if sizeMb > 10 { 1157 telemetry.CountBucketed("import.speed-mbps.over10mb", mbps) 1158 } 1159 1160 resultsCh <- tree.Datums{ 1161 tree.NewDInt(tree.DInt(*r.job.ID())), 1162 tree.NewDString(string(jobs.StatusSucceeded)), 1163 tree.NewDFloat(tree.DFloat(1.0)), 1164 tree.NewDInt(tree.DInt(r.res.Rows)), 1165 tree.NewDInt(tree.DInt(r.res.IndexEntries)), 1166 tree.NewDInt(tree.DInt(r.res.DataSize)), 1167 } 1168 1169 return nil 1170 } 1171 1172 // publishTables updates the status of imported tables from OFFLINE to PUBLIC. 1173 func (r *importResumer) publishTables(ctx context.Context, execCfg *sql.ExecutorConfig) error { 1174 details := r.job.Details().(jobspb.ImportDetails) 1175 // Tables should only be published once. 1176 if details.TablesPublished { 1177 return nil 1178 } 1179 log.Event(ctx, "making tables live") 1180 1181 // Needed to trigger the schema change manager. 1182 err := execCfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1183 if err := txn.SetSystemConfigTrigger(); err != nil { 1184 return err 1185 } 1186 b := txn.NewBatch() 1187 for _, tbl := range details.Tables { 1188 tableDesc := *tbl.Desc 1189 tableDesc.Version++ 1190 tableDesc.State = sqlbase.TableDescriptor_PUBLIC 1191 1192 if !tbl.IsNew { 1193 // NB: This is not using AllNonDropIndexes or directly mutating the 1194 // constraints returned by the other usual helpers because we need to 1195 // replace the `OutboundFKs` and `Checks` slices of tableDesc with copies 1196 // that we can mutate. We need to do that because tableDesc is a shallow 1197 // copy of tbl.Desc that we'll be asserting is the current version when we 1198 // CPut below. 1199 // 1200 // Set FK constraints to unvalidated before publishing the table imported 1201 // into. 1202 tableDesc.OutboundFKs = make([]sqlbase.ForeignKeyConstraint, len(tableDesc.OutboundFKs)) 1203 copy(tableDesc.OutboundFKs, tbl.Desc.OutboundFKs) 1204 for i := range tableDesc.OutboundFKs { 1205 tableDesc.OutboundFKs[i].Validity = sqlbase.ConstraintValidity_Unvalidated 1206 } 1207 1208 // Set CHECK constraints to unvalidated before publishing the table imported into. 1209 tableDesc.Checks = make([]*sqlbase.TableDescriptor_CheckConstraint, len(tbl.Desc.Checks)) 1210 for i, c := range tbl.Desc.AllActiveAndInactiveChecks() { 1211 ck := *c 1212 ck.Validity = sqlbase.ConstraintValidity_Unvalidated 1213 tableDesc.Checks[i] = &ck 1214 } 1215 } 1216 1217 // TODO(dt): re-validate any FKs? 1218 // Note that this CPut is safe with respect to mixed-version descriptor 1219 // upgrade and downgrade, because IMPORT does not operate in mixed-version 1220 // states. 1221 // TODO(jordan,lucy): remove this comment once 19.2 is released. 1222 existingDesc, err := sqlbase.ConditionalGetTableDescFromTxn(ctx, txn, execCfg.Codec, tbl.Desc) 1223 if err != nil { 1224 return errors.Wrap(err, "publishing tables") 1225 } 1226 b.CPut( 1227 sqlbase.MakeDescMetadataKey(execCfg.Codec, tableDesc.ID), 1228 sqlbase.WrapDescriptor(&tableDesc), 1229 existingDesc) 1230 } 1231 if err := txn.Run(ctx, b); err != nil { 1232 return errors.Wrap(err, "publishing tables") 1233 } 1234 1235 // Update job record to mark tables published state as complete. 1236 details.TablesPublished = true 1237 err := r.job.WithTxn(txn).SetDetails(ctx, details) 1238 if err != nil { 1239 return errors.Wrap(err, "updating job details after publishing tables") 1240 } 1241 1242 return nil 1243 }) 1244 1245 if err != nil { 1246 return err 1247 } 1248 1249 // Initiate a run of CREATE STATISTICS. We don't know the actual number of 1250 // rows affected per table, so we use a large number because we want to make 1251 // sure that stats always get created/refreshed here. 1252 for i := range details.Tables { 1253 execCfg.StatsRefresher.NotifyMutation(details.Tables[i].Desc.ID, math.MaxInt32 /* rowsAffected */) 1254 } 1255 1256 return nil 1257 } 1258 1259 // OnFailOrCancel is part of the jobs.Resumer interface. Removes data that has 1260 // been committed from a import that has failed or been canceled. It does this 1261 // by adding the table descriptors in DROP state, which causes the schema change 1262 // stuff to delete the keys in the background. 1263 func (r *importResumer) OnFailOrCancel(ctx context.Context, phs interface{}) error { 1264 telemetry.Count("import.total.failed") 1265 1266 cfg := phs.(sql.PlanHookState).ExecCfg() 1267 return cfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1268 if err := r.dropTables(ctx, txn, cfg); err != nil { 1269 return err 1270 } 1271 return r.releaseProtectedTimestamp(ctx, txn, cfg.ProtectedTimestampProvider) 1272 }) 1273 } 1274 1275 func (r *importResumer) releaseProtectedTimestamp( 1276 ctx context.Context, txn *kv.Txn, pts protectedts.Storage, 1277 ) error { 1278 details := r.job.Details().(jobspb.ImportDetails) 1279 ptsID := details.ProtectedTimestampRecord 1280 // If the job doesn't have a protected timestamp then there's nothing to do. 1281 if ptsID == nil { 1282 return nil 1283 } 1284 err := pts.Release(ctx, txn, *ptsID) 1285 if errors.Is(err, protectedts.ErrNotExists) { 1286 // No reason to return an error which might cause problems if it doesn't 1287 // seem to exist. 1288 log.Warningf(ctx, "failed to release protected which seems not to exist: %v", err) 1289 err = nil 1290 } 1291 return err 1292 } 1293 1294 // dropTables implements the OnFailOrCancel logic. 1295 func (r *importResumer) dropTables( 1296 ctx context.Context, txn *kv.Txn, execCfg *sql.ExecutorConfig, 1297 ) error { 1298 details := r.job.Details().(jobspb.ImportDetails) 1299 1300 // Needed to trigger the schema change manager. 1301 if err := txn.SetSystemConfigTrigger(); err != nil { 1302 return err 1303 } 1304 1305 // If the prepare step of the import job was not completed then the 1306 // descriptors do not need to be rolled back as the txn updating them never 1307 // completed. 1308 if !details.PrepareComplete { 1309 return nil 1310 } 1311 1312 var revert []*sqlbase.TableDescriptor 1313 for _, tbl := range details.Tables { 1314 if !tbl.IsNew { 1315 revert = append(revert, tbl.Desc) 1316 } 1317 } 1318 1319 // NB: if a revert fails it will abort the rest of this failure txn, which is 1320 // also what brings tables back online. We _could_ change the error handling 1321 // or just move the revert into Resume()'s error return path, however it isn't 1322 // clear that just bringing a table back online with partially imported data 1323 // that may or may not be partially reverted is actually a good idea. It seems 1324 // better to do the revert here so that the table comes back if and only if, 1325 // it was rolled back to its pre-IMPORT state, and instead provide a manual 1326 // admin knob (e.g. ALTER TABLE REVERT TO SYSTEM TIME) if anything goes wrong. 1327 if len(revert) > 0 { 1328 // Sanity check Walltime so it doesn't become a TRUNCATE if there's a bug. 1329 if details.Walltime == 0 { 1330 return errors.Errorf("invalid pre-IMPORT time to rollback") 1331 } 1332 ts := hlc.Timestamp{WallTime: details.Walltime}.Prev() 1333 if err := sql.RevertTables(ctx, txn.DB(), execCfg, revert, ts, sql.RevertTableDefaultBatchSize); err != nil { 1334 return errors.Wrap(err, "rolling back partially completed IMPORT") 1335 } 1336 } 1337 1338 b := txn.NewBatch() 1339 dropTime := int64(1) 1340 tablesToGC := make([]sqlbase.ID, 0, len(details.Tables)) 1341 for _, tbl := range details.Tables { 1342 tableDesc := *tbl.Desc 1343 tableDesc.Version++ 1344 if tbl.IsNew { 1345 tableDesc.State = sqlbase.TableDescriptor_DROP 1346 // If the DropTime if set, a table uses RangeClear for fast data removal. This 1347 // operation starts at DropTime + the GC TTL. If we used now() here, it would 1348 // not clean up data until the TTL from the time of the error. Instead, use 1 1349 // (that is, 1ns past the epoch) to allow this to be cleaned up as soon as 1350 // possible. This is safe since the table data was never visible to users, 1351 // and so we don't need to preserve MVCC semantics. 1352 tableDesc.DropTime = dropTime 1353 if err := sqlbase.RemovePublicTableNamespaceEntry(ctx, txn, execCfg.Codec, tableDesc.ParentID, tableDesc.Name); err != nil { 1354 return err 1355 } 1356 tablesToGC = append(tablesToGC, tableDesc.ID) 1357 } else { 1358 // IMPORT did not create this table, so we should not drop it. 1359 tableDesc.State = sqlbase.TableDescriptor_PUBLIC 1360 } 1361 // Note that this CPut is safe with respect to mixed-version descriptor 1362 // upgrade and downgrade, because IMPORT does not operate in mixed-version 1363 // states. 1364 // TODO(jordan,lucy): remove this comment once 19.2 is released. 1365 existingDesc, err := sqlbase.ConditionalGetTableDescFromTxn(ctx, txn, execCfg.Codec, tbl.Desc) 1366 if err != nil { 1367 return errors.Wrap(err, "rolling back tables") 1368 } 1369 b.CPut( 1370 sqlbase.MakeDescMetadataKey(execCfg.Codec, tableDesc.ID), 1371 sqlbase.WrapDescriptor(&tableDesc), 1372 existingDesc) 1373 } 1374 1375 // Queue a GC job. 1376 gcDetails := jobspb.SchemaChangeGCDetails{} 1377 for _, tableID := range tablesToGC { 1378 gcDetails.Tables = append(gcDetails.Tables, jobspb.SchemaChangeGCDetails_DroppedID{ 1379 ID: tableID, 1380 DropTime: dropTime, 1381 }) 1382 } 1383 gcJobRecord := jobs.Record{ 1384 Description: fmt.Sprintf("GC for %s", r.job.Payload().Description), 1385 Username: r.job.Payload().Username, 1386 DescriptorIDs: tablesToGC, 1387 Details: gcDetails, 1388 Progress: jobspb.SchemaChangeGCProgress{}, 1389 NonCancelable: true, 1390 } 1391 if _, err := execCfg.JobRegistry.CreateJobWithTxn(ctx, gcJobRecord, txn); err != nil { 1392 return err 1393 } 1394 1395 return errors.Wrap(txn.Run(ctx, b), "rolling back tables") 1396 } 1397 1398 var _ jobs.Resumer = &importResumer{} 1399 1400 func init() { 1401 sql.AddPlanHook(importPlanHook) 1402 jobs.RegisterConstructor( 1403 jobspb.TypeImport, 1404 func(job *jobs.Job, settings *cluster.Settings) jobs.Resumer { 1405 return &importResumer{ 1406 job: job, 1407 settings: settings, 1408 } 1409 }, 1410 ) 1411 }