github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/load.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package importccl 10 11 import ( 12 "bufio" 13 "bytes" 14 "context" 15 gosql "database/sql" 16 "fmt" 17 "io" 18 "math/rand" 19 20 "github.com/cockroachdb/cockroach/pkg/base" 21 "github.com/cockroachdb/cockroach/pkg/blobs" 22 "github.com/cockroachdb/cockroach/pkg/ccl/backupccl" 23 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 24 "github.com/cockroachdb/cockroach/pkg/keys" 25 "github.com/cockroachdb/cockroach/pkg/kv" 26 "github.com/cockroachdb/cockroach/pkg/roachpb" 27 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 28 "github.com/cockroachdb/cockroach/pkg/sql" 29 "github.com/cockroachdb/cockroach/pkg/sql/parser" 30 "github.com/cockroachdb/cockroach/pkg/sql/row" 31 "github.com/cockroachdb/cockroach/pkg/sql/sem/transform" 32 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 33 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 34 "github.com/cockroachdb/cockroach/pkg/storage" 35 "github.com/cockroachdb/cockroach/pkg/storage/cloud" 36 "github.com/cockroachdb/cockroach/pkg/util" 37 "github.com/cockroachdb/cockroach/pkg/util/hlc" 38 "github.com/cockroachdb/cockroach/pkg/util/log" 39 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 40 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 41 "github.com/cockroachdb/errors" 42 ) 43 44 // TestingGetDescriptorFromDB is a wrapper for getDescriptorFromDB. 45 func TestingGetDescriptorFromDB( 46 ctx context.Context, db *gosql.DB, dbName string, 47 ) (*sqlbase.DatabaseDescriptor, error) { 48 return getDescriptorFromDB(ctx, db, dbName) 49 } 50 51 // getDescriptorFromDB returns the descriptor in bytes of the given table name. 52 func getDescriptorFromDB( 53 ctx context.Context, db *gosql.DB, dbName string, 54 ) (*sqlbase.DatabaseDescriptor, error) { 55 var dbDescBytes []byte 56 // Due to the namespace migration, the row may not exist in system.namespace 57 // so a fallback to system.namespace_deprecated is required. 58 // TODO(sqlexec): In 20.2, this logic can be removed. 59 for _, t := range []struct { 60 tableName string 61 extraClause string 62 }{ 63 {fmt.Sprintf("[%d AS n]", keys.NamespaceTableID), `AND "parentSchemaID" = 0`}, 64 {fmt.Sprintf("[%d AS n]", keys.DeprecatedNamespaceTableID), ""}, 65 } { 66 if err := db.QueryRow( 67 fmt.Sprintf(`SELECT 68 d.descriptor 69 FROM %s INNER JOIN system.descriptor d ON n.id = d.id 70 WHERE n."parentID" = $1 %s 71 AND n.name = $2`, 72 t.tableName, 73 t.extraClause, 74 ), 75 keys.RootNamespaceID, 76 dbName, 77 ).Scan(&dbDescBytes); err != nil { 78 if errors.Is(err, gosql.ErrNoRows) { 79 continue 80 } 81 return nil, errors.Wrap(err, "fetch database descriptor") 82 } 83 var dbDescWrapper sqlbase.Descriptor 84 if err := protoutil.Unmarshal(dbDescBytes, &dbDescWrapper); err != nil { 85 return nil, errors.Wrap(err, "unmarshal database descriptor") 86 } 87 return dbDescWrapper.GetDatabase(), nil 88 } 89 return nil, gosql.ErrNoRows 90 } 91 92 // Load converts r into SSTables and backup descriptors. database is the name 93 // of the database into which the SSTables will eventually be written. uri 94 // is the storage location. ts is the time at which the MVCC data will 95 // be set. loadChunkBytes is the size at which to create a new SSTable 96 // (which will translate into a new range during restore); set to 0 to use 97 // the zone's default range max / 2. 98 func Load( 99 ctx context.Context, 100 db *gosql.DB, 101 r io.Reader, 102 database, uri string, 103 ts hlc.Timestamp, 104 loadChunkBytes int64, 105 tempPrefix string, 106 writeToDir string, 107 ) (backupccl.BackupManifest, error) { 108 if loadChunkBytes == 0 { 109 loadChunkBytes = *zonepb.DefaultZoneConfig().RangeMaxBytes / 2 110 } 111 112 var txCtx transform.ExprTransformContext 113 curTime := timeutil.Unix(0, ts.WallTime) 114 evalCtx := &tree.EvalContext{} 115 evalCtx.SetTxnTimestamp(curTime) 116 evalCtx.SetStmtTimestamp(curTime) 117 evalCtx.Codec = keys.TODOSQLCodec 118 119 blobClientFactory := blobs.TestBlobServiceClient(writeToDir) 120 conf, err := cloud.ExternalStorageConfFromURI(uri) 121 if err != nil { 122 return backupccl.BackupManifest{}, err 123 } 124 dir, err := cloud.MakeExternalStorage(ctx, conf, base.ExternalIODirConfig{}, 125 cluster.NoSettings, blobClientFactory) 126 if err != nil { 127 return backupccl.BackupManifest{}, errors.Wrap(err, "export storage from URI") 128 } 129 defer dir.Close() 130 131 dbDesc, err := getDescriptorFromDB(ctx, db, database) 132 if err != nil { 133 return backupccl.BackupManifest{}, err 134 } 135 136 privs := dbDesc.GetPrivileges() 137 138 tableDescs := make(map[string]*sqlbase.ImmutableTableDescriptor) 139 140 var currentCmd bytes.Buffer 141 scanner := bufio.NewReader(r) 142 var ri row.Inserter 143 var defaultExprs []tree.TypedExpr 144 var cols []sqlbase.ColumnDescriptor 145 var tableDesc *sqlbase.ImmutableTableDescriptor 146 var tableName string 147 var prevKey roachpb.Key 148 var kvs []storage.MVCCKeyValue 149 var kvBytes int64 150 backup := backupccl.BackupManifest{ 151 Descriptors: []sqlbase.Descriptor{ 152 {Union: &sqlbase.Descriptor_Database{Database: dbDesc}}, 153 }, 154 } 155 for { 156 line, err := scanner.ReadString('\n') 157 if err == io.EOF { 158 break 159 } 160 if err != nil { 161 return backupccl.BackupManifest{}, errors.Wrap(err, "read line") 162 } 163 currentCmd.WriteString(line) 164 if !parser.EndsInSemicolon(currentCmd.String()) { 165 currentCmd.WriteByte('\n') 166 continue 167 } 168 cmd := currentCmd.String() 169 currentCmd.Reset() 170 stmt, err := parser.ParseOne(cmd) 171 if err != nil { 172 return backupccl.BackupManifest{}, errors.Wrapf(err, "parsing: %q", cmd) 173 } 174 switch s := stmt.AST.(type) { 175 case *tree.CreateTable: 176 if tableDesc != nil { 177 if err := writeSST(ctx, &backup, dir, tempPrefix, kvs, ts); err != nil { 178 return backupccl.BackupManifest{}, errors.Wrap(err, "writeSST") 179 } 180 kvs = kvs[:0] 181 kvBytes = 0 182 } 183 184 // TODO(mjibson): error for now on FKs and CHECK constraints 185 // TODO(mjibson): differentiate between qualified (with database) and unqualified (without database) table names 186 187 tableName = s.Table.String() 188 tableDesc = tableDescs[tableName] 189 if tableDesc != nil { 190 return backupccl.BackupManifest{}, errors.Errorf("duplicate CREATE TABLE for %s", tableName) 191 } 192 193 // Using test cluster settings means that we'll generate a backup using 194 // the latest cluster version available in this binary. This will be safe 195 // once we verify the cluster version during restore. 196 // 197 // TODO(benesch): ensure backups from too-old or too-new nodes are 198 // rejected during restore. 199 st := cluster.MakeTestingClusterSettings() 200 201 affected := make(map[sqlbase.ID]*sqlbase.MutableTableDescriptor) 202 // A nil txn is safe because it is only used by sql.MakeTableDesc, which 203 // only uses txn for resolving FKs and interleaved tables, neither of which 204 // are present here. Ditto for the schema accessor. 205 var txn *kv.Txn 206 // At this point the CREATE statements in the loaded SQL do not 207 // use the SERIAL type so we need not process SERIAL types here. 208 desc, err := sql.MakeTableDesc(ctx, txn, nil /* vt */, st, s, dbDesc.ID, keys.PublicSchemaID, 209 0 /* table ID */, ts, privs, affected, nil, evalCtx, evalCtx.SessionData, false /* temporary */) 210 if err != nil { 211 return backupccl.BackupManifest{}, errors.Wrap(err, "make table desc") 212 } 213 214 tableDesc = sqlbase.NewImmutableTableDescriptor(*desc.TableDesc()) 215 tableDescs[tableName] = tableDesc 216 backup.Descriptors = append(backup.Descriptors, sqlbase.Descriptor{ 217 Union: &sqlbase.Descriptor_Table{Table: desc.TableDesc()}, 218 }) 219 220 for i := range tableDesc.Columns { 221 col := &tableDesc.Columns[i] 222 if col.IsComputed() { 223 return backupccl.BackupManifest{}, errors.Errorf("computed columns are not allowed") 224 } 225 } 226 227 ri, err = row.MakeInserter( 228 ctx, nil, evalCtx.Codec, tableDesc, tableDesc.Columns, row.SkipFKs, nil /* fkTables */, &sqlbase.DatumAlloc{}, 229 ) 230 if err != nil { 231 return backupccl.BackupManifest{}, errors.Wrap(err, "make row inserter") 232 } 233 cols, defaultExprs, err = 234 sqlbase.ProcessDefaultColumns(ctx, tableDesc.Columns, tableDesc, &txCtx, evalCtx) 235 if err != nil { 236 return backupccl.BackupManifest{}, errors.Wrap(err, "process default columns") 237 } 238 239 case *tree.Insert: 240 name := tree.AsString(s.Table) 241 if tableDesc == nil { 242 return backupccl.BackupManifest{}, errors.Errorf("expected previous CREATE TABLE %s statement", name) 243 } 244 if name != tableName { 245 return backupccl.BackupManifest{}, errors.Errorf("unexpected INSERT for table %s after CREATE TABLE %s", name, tableName) 246 } 247 outOfOrder := false 248 err := insertStmtToKVs(ctx, tableDesc, defaultExprs, cols, evalCtx, ri, s, func(kv roachpb.KeyValue) { 249 if outOfOrder || prevKey.Compare(kv.Key) >= 0 { 250 outOfOrder = true 251 return 252 } 253 prevKey = kv.Key 254 kvBytes += int64(len(kv.Key) + len(kv.Value.RawBytes)) 255 kvs = append(kvs, storage.MVCCKeyValue{ 256 Key: storage.MVCCKey{Key: kv.Key, Timestamp: kv.Value.Timestamp}, 257 Value: kv.Value.RawBytes, 258 }) 259 }) 260 if err != nil { 261 return backupccl.BackupManifest{}, errors.Wrapf(err, "insertStmtToKVs") 262 } 263 if outOfOrder { 264 return backupccl.BackupManifest{}, errors.Errorf("out of order row: %s", cmd) 265 } 266 267 if kvBytes > loadChunkBytes { 268 if err := writeSST(ctx, &backup, dir, tempPrefix, kvs, ts); err != nil { 269 return backupccl.BackupManifest{}, errors.Wrap(err, "writeSST") 270 } 271 kvs = kvs[:0] 272 kvBytes = 0 273 } 274 275 default: 276 return backupccl.BackupManifest{}, errors.Errorf("unsupported load statement: %q", stmt) 277 } 278 } 279 280 if tableDesc != nil { 281 if err := writeSST(ctx, &backup, dir, tempPrefix, kvs, ts); err != nil { 282 return backupccl.BackupManifest{}, errors.Wrap(err, "writeSST") 283 } 284 } 285 286 descBuf, err := protoutil.Marshal(&backup) 287 if err != nil { 288 return backupccl.BackupManifest{}, errors.Wrap(err, "marshal backup descriptor") 289 } 290 if err := dir.WriteFile(ctx, backupccl.BackupManifestName, bytes.NewReader(descBuf)); err != nil { 291 return backupccl.BackupManifest{}, errors.Wrap(err, "uploading backup descriptor") 292 } 293 294 return backup, nil 295 } 296 297 func insertStmtToKVs( 298 ctx context.Context, 299 tableDesc *sqlbase.ImmutableTableDescriptor, 300 defaultExprs []tree.TypedExpr, 301 cols []sqlbase.ColumnDescriptor, 302 evalCtx *tree.EvalContext, 303 ri row.Inserter, 304 stmt *tree.Insert, 305 f func(roachpb.KeyValue), 306 ) error { 307 if stmt.OnConflict != nil { 308 return errors.Errorf("load insert: ON CONFLICT not supported: %q", stmt) 309 } 310 if tree.HasReturningClause(stmt.Returning) { 311 return errors.Errorf("load insert: RETURNING not supported: %q", stmt) 312 } 313 if len(stmt.Columns) > 0 { 314 if len(stmt.Columns) != len(cols) { 315 return errors.Errorf("load insert: wrong number of columns: %q", stmt) 316 } 317 for i := range tableDesc.Columns { 318 if stmt.Columns[i].String() != tableDesc.Columns[i].Name { 319 return errors.Errorf("load insert: unexpected column order: %q", stmt) 320 } 321 } 322 } 323 if stmt.Rows.Limit != nil { 324 return errors.Errorf("load insert: LIMIT not supported: %q", stmt) 325 } 326 if stmt.Rows.OrderBy != nil { 327 return errors.Errorf("load insert: ORDER BY not supported: %q", stmt) 328 } 329 values, ok := stmt.Rows.Select.(*tree.ValuesClause) 330 if !ok { 331 return errors.Errorf("load insert: expected VALUES clause: %q", stmt) 332 } 333 334 b := row.KVInserter(f) 335 computedIVarContainer := sqlbase.RowIndexedVarContainer{ 336 Mapping: ri.InsertColIDtoRowIndex, 337 Cols: tableDesc.Columns, 338 } 339 for _, tuple := range values.Rows { 340 insertRow := make([]tree.Datum, len(tuple)) 341 for i, expr := range tuple { 342 if expr == tree.DNull { 343 insertRow[i] = tree.DNull 344 continue 345 } 346 c, ok := expr.(tree.Constant) 347 if !ok { 348 return errors.Errorf("unsupported expr: %q", expr) 349 } 350 var err error 351 insertRow[i], err = c.ResolveAsType(nil, tableDesc.Columns[i].Type) 352 if err != nil { 353 return err 354 } 355 } 356 357 // We have disallowed computed exprs. 358 var computeExprs []tree.TypedExpr 359 var computedCols []sqlbase.ColumnDescriptor 360 361 insertRow, err := row.GenerateInsertRow( 362 defaultExprs, computeExprs, cols, computedCols, evalCtx, tableDesc, insertRow, &computedIVarContainer, 363 ) 364 if err != nil { 365 return errors.Wrapf(err, "process insert %q", insertRow) 366 } 367 // TODO(bram): Is the checking of FKs here required? If not, turning them 368 // off may provide a speed boost. 369 // TODO(mgartner): Add partial index IDs to ignoreIndexes that we should 370 // not add entries to. 371 var ignoreIndexes util.FastIntSet 372 if err := ri.InsertRow(ctx, b, insertRow, ignoreIndexes, true, row.CheckFKs, false /* traceKV */); err != nil { 373 return errors.Wrapf(err, "insert %q", insertRow) 374 } 375 } 376 return nil 377 } 378 379 func writeSST( 380 ctx context.Context, 381 backup *backupccl.BackupManifest, 382 base cloud.ExternalStorage, 383 tempPrefix string, 384 kvs []storage.MVCCKeyValue, 385 ts hlc.Timestamp, 386 ) error { 387 if len(kvs) == 0 { 388 return nil 389 } 390 391 filename := fmt.Sprintf("load-%d.sst", rand.Int63()) 392 log.Infof(ctx, "writesst %s", filename) 393 394 sstFile := &storage.MemFile{} 395 sst := storage.MakeBackupSSTWriter(sstFile) 396 defer sst.Close() 397 for _, kv := range kvs { 398 kv.Key.Timestamp = ts 399 if err := sst.Put(kv.Key, kv.Value); err != nil { 400 return err 401 } 402 } 403 err := sst.Finish() 404 if err != nil { 405 return err 406 } 407 408 // TODO(itsbilal): Pass a file handle into SSTWriter instead of writing to a 409 // MemFile first. 410 if err := base.WriteFile(ctx, filename, bytes.NewReader(sstFile.Data())); err != nil { 411 return err 412 } 413 414 backup.Files = append(backup.Files, backupccl.BackupManifest_File{ 415 Span: roachpb.Span{ 416 Key: kvs[0].Key.Key, 417 // The EndKey is exclusive, so use PrefixEnd to get the first key 418 // greater than the last key in the sst. 419 EndKey: kvs[len(kvs)-1].Key.Key.PrefixEnd(), 420 }, 421 Path: filename, 422 }) 423 backup.EntryCounts.DataSize += sst.DataSize 424 return nil 425 }