github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/load.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package importccl
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  	"io"
    18  	"math/rand"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/base"
    21  	"github.com/cockroachdb/cockroach/pkg/blobs"
    22  	"github.com/cockroachdb/cockroach/pkg/ccl/backupccl"
    23  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    24  	"github.com/cockroachdb/cockroach/pkg/keys"
    25  	"github.com/cockroachdb/cockroach/pkg/kv"
    26  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    27  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    28  	"github.com/cockroachdb/cockroach/pkg/sql"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/sem/transform"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    34  	"github.com/cockroachdb/cockroach/pkg/storage"
    35  	"github.com/cockroachdb/cockroach/pkg/storage/cloud"
    36  	"github.com/cockroachdb/cockroach/pkg/util"
    37  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    38  	"github.com/cockroachdb/cockroach/pkg/util/log"
    39  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    40  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    41  	"github.com/cockroachdb/errors"
    42  )
    43  
    44  // TestingGetDescriptorFromDB is a wrapper for getDescriptorFromDB.
    45  func TestingGetDescriptorFromDB(
    46  	ctx context.Context, db *gosql.DB, dbName string,
    47  ) (*sqlbase.DatabaseDescriptor, error) {
    48  	return getDescriptorFromDB(ctx, db, dbName)
    49  }
    50  
    51  // getDescriptorFromDB returns the descriptor in bytes of the given table name.
    52  func getDescriptorFromDB(
    53  	ctx context.Context, db *gosql.DB, dbName string,
    54  ) (*sqlbase.DatabaseDescriptor, error) {
    55  	var dbDescBytes []byte
    56  	// Due to the namespace migration, the row may not exist in system.namespace
    57  	// so a fallback to system.namespace_deprecated is required.
    58  	// TODO(sqlexec): In 20.2, this logic can be removed.
    59  	for _, t := range []struct {
    60  		tableName   string
    61  		extraClause string
    62  	}{
    63  		{fmt.Sprintf("[%d AS n]", keys.NamespaceTableID), `AND "parentSchemaID" = 0`},
    64  		{fmt.Sprintf("[%d AS n]", keys.DeprecatedNamespaceTableID), ""},
    65  	} {
    66  		if err := db.QueryRow(
    67  			fmt.Sprintf(`SELECT
    68  			d.descriptor
    69  		FROM %s INNER JOIN system.descriptor d ON n.id = d.id
    70  		WHERE n."parentID" = $1 %s
    71  		AND n.name = $2`,
    72  				t.tableName,
    73  				t.extraClause,
    74  			),
    75  			keys.RootNamespaceID,
    76  			dbName,
    77  		).Scan(&dbDescBytes); err != nil {
    78  			if errors.Is(err, gosql.ErrNoRows) {
    79  				continue
    80  			}
    81  			return nil, errors.Wrap(err, "fetch database descriptor")
    82  		}
    83  		var dbDescWrapper sqlbase.Descriptor
    84  		if err := protoutil.Unmarshal(dbDescBytes, &dbDescWrapper); err != nil {
    85  			return nil, errors.Wrap(err, "unmarshal database descriptor")
    86  		}
    87  		return dbDescWrapper.GetDatabase(), nil
    88  	}
    89  	return nil, gosql.ErrNoRows
    90  }
    91  
    92  // Load converts r into SSTables and backup descriptors. database is the name
    93  // of the database into which the SSTables will eventually be written. uri
    94  // is the storage location. ts is the time at which the MVCC data will
    95  // be set. loadChunkBytes is the size at which to create a new SSTable
    96  // (which will translate into a new range during restore); set to 0 to use
    97  // the zone's default range max / 2.
    98  func Load(
    99  	ctx context.Context,
   100  	db *gosql.DB,
   101  	r io.Reader,
   102  	database, uri string,
   103  	ts hlc.Timestamp,
   104  	loadChunkBytes int64,
   105  	tempPrefix string,
   106  	writeToDir string,
   107  ) (backupccl.BackupManifest, error) {
   108  	if loadChunkBytes == 0 {
   109  		loadChunkBytes = *zonepb.DefaultZoneConfig().RangeMaxBytes / 2
   110  	}
   111  
   112  	var txCtx transform.ExprTransformContext
   113  	curTime := timeutil.Unix(0, ts.WallTime)
   114  	evalCtx := &tree.EvalContext{}
   115  	evalCtx.SetTxnTimestamp(curTime)
   116  	evalCtx.SetStmtTimestamp(curTime)
   117  	evalCtx.Codec = keys.TODOSQLCodec
   118  
   119  	blobClientFactory := blobs.TestBlobServiceClient(writeToDir)
   120  	conf, err := cloud.ExternalStorageConfFromURI(uri)
   121  	if err != nil {
   122  		return backupccl.BackupManifest{}, err
   123  	}
   124  	dir, err := cloud.MakeExternalStorage(ctx, conf, base.ExternalIODirConfig{},
   125  		cluster.NoSettings, blobClientFactory)
   126  	if err != nil {
   127  		return backupccl.BackupManifest{}, errors.Wrap(err, "export storage from URI")
   128  	}
   129  	defer dir.Close()
   130  
   131  	dbDesc, err := getDescriptorFromDB(ctx, db, database)
   132  	if err != nil {
   133  		return backupccl.BackupManifest{}, err
   134  	}
   135  
   136  	privs := dbDesc.GetPrivileges()
   137  
   138  	tableDescs := make(map[string]*sqlbase.ImmutableTableDescriptor)
   139  
   140  	var currentCmd bytes.Buffer
   141  	scanner := bufio.NewReader(r)
   142  	var ri row.Inserter
   143  	var defaultExprs []tree.TypedExpr
   144  	var cols []sqlbase.ColumnDescriptor
   145  	var tableDesc *sqlbase.ImmutableTableDescriptor
   146  	var tableName string
   147  	var prevKey roachpb.Key
   148  	var kvs []storage.MVCCKeyValue
   149  	var kvBytes int64
   150  	backup := backupccl.BackupManifest{
   151  		Descriptors: []sqlbase.Descriptor{
   152  			{Union: &sqlbase.Descriptor_Database{Database: dbDesc}},
   153  		},
   154  	}
   155  	for {
   156  		line, err := scanner.ReadString('\n')
   157  		if err == io.EOF {
   158  			break
   159  		}
   160  		if err != nil {
   161  			return backupccl.BackupManifest{}, errors.Wrap(err, "read line")
   162  		}
   163  		currentCmd.WriteString(line)
   164  		if !parser.EndsInSemicolon(currentCmd.String()) {
   165  			currentCmd.WriteByte('\n')
   166  			continue
   167  		}
   168  		cmd := currentCmd.String()
   169  		currentCmd.Reset()
   170  		stmt, err := parser.ParseOne(cmd)
   171  		if err != nil {
   172  			return backupccl.BackupManifest{}, errors.Wrapf(err, "parsing: %q", cmd)
   173  		}
   174  		switch s := stmt.AST.(type) {
   175  		case *tree.CreateTable:
   176  			if tableDesc != nil {
   177  				if err := writeSST(ctx, &backup, dir, tempPrefix, kvs, ts); err != nil {
   178  					return backupccl.BackupManifest{}, errors.Wrap(err, "writeSST")
   179  				}
   180  				kvs = kvs[:0]
   181  				kvBytes = 0
   182  			}
   183  
   184  			// TODO(mjibson): error for now on FKs and CHECK constraints
   185  			// TODO(mjibson): differentiate between qualified (with database) and unqualified (without database) table names
   186  
   187  			tableName = s.Table.String()
   188  			tableDesc = tableDescs[tableName]
   189  			if tableDesc != nil {
   190  				return backupccl.BackupManifest{}, errors.Errorf("duplicate CREATE TABLE for %s", tableName)
   191  			}
   192  
   193  			// Using test cluster settings means that we'll generate a backup using
   194  			// the latest cluster version available in this binary. This will be safe
   195  			// once we verify the cluster version during restore.
   196  			//
   197  			// TODO(benesch): ensure backups from too-old or too-new nodes are
   198  			// rejected during restore.
   199  			st := cluster.MakeTestingClusterSettings()
   200  
   201  			affected := make(map[sqlbase.ID]*sqlbase.MutableTableDescriptor)
   202  			// A nil txn is safe because it is only used by sql.MakeTableDesc, which
   203  			// only uses txn for resolving FKs and interleaved tables, neither of which
   204  			// are present here. Ditto for the schema accessor.
   205  			var txn *kv.Txn
   206  			// At this point the CREATE statements in the loaded SQL do not
   207  			// use the SERIAL type so we need not process SERIAL types here.
   208  			desc, err := sql.MakeTableDesc(ctx, txn, nil /* vt */, st, s, dbDesc.ID, keys.PublicSchemaID,
   209  				0 /* table ID */, ts, privs, affected, nil, evalCtx, evalCtx.SessionData, false /* temporary */)
   210  			if err != nil {
   211  				return backupccl.BackupManifest{}, errors.Wrap(err, "make table desc")
   212  			}
   213  
   214  			tableDesc = sqlbase.NewImmutableTableDescriptor(*desc.TableDesc())
   215  			tableDescs[tableName] = tableDesc
   216  			backup.Descriptors = append(backup.Descriptors, sqlbase.Descriptor{
   217  				Union: &sqlbase.Descriptor_Table{Table: desc.TableDesc()},
   218  			})
   219  
   220  			for i := range tableDesc.Columns {
   221  				col := &tableDesc.Columns[i]
   222  				if col.IsComputed() {
   223  					return backupccl.BackupManifest{}, errors.Errorf("computed columns are not allowed")
   224  				}
   225  			}
   226  
   227  			ri, err = row.MakeInserter(
   228  				ctx, nil, evalCtx.Codec, tableDesc, tableDesc.Columns, row.SkipFKs, nil /* fkTables */, &sqlbase.DatumAlloc{},
   229  			)
   230  			if err != nil {
   231  				return backupccl.BackupManifest{}, errors.Wrap(err, "make row inserter")
   232  			}
   233  			cols, defaultExprs, err =
   234  				sqlbase.ProcessDefaultColumns(ctx, tableDesc.Columns, tableDesc, &txCtx, evalCtx)
   235  			if err != nil {
   236  				return backupccl.BackupManifest{}, errors.Wrap(err, "process default columns")
   237  			}
   238  
   239  		case *tree.Insert:
   240  			name := tree.AsString(s.Table)
   241  			if tableDesc == nil {
   242  				return backupccl.BackupManifest{}, errors.Errorf("expected previous CREATE TABLE %s statement", name)
   243  			}
   244  			if name != tableName {
   245  				return backupccl.BackupManifest{}, errors.Errorf("unexpected INSERT for table %s after CREATE TABLE %s", name, tableName)
   246  			}
   247  			outOfOrder := false
   248  			err := insertStmtToKVs(ctx, tableDesc, defaultExprs, cols, evalCtx, ri, s, func(kv roachpb.KeyValue) {
   249  				if outOfOrder || prevKey.Compare(kv.Key) >= 0 {
   250  					outOfOrder = true
   251  					return
   252  				}
   253  				prevKey = kv.Key
   254  				kvBytes += int64(len(kv.Key) + len(kv.Value.RawBytes))
   255  				kvs = append(kvs, storage.MVCCKeyValue{
   256  					Key:   storage.MVCCKey{Key: kv.Key, Timestamp: kv.Value.Timestamp},
   257  					Value: kv.Value.RawBytes,
   258  				})
   259  			})
   260  			if err != nil {
   261  				return backupccl.BackupManifest{}, errors.Wrapf(err, "insertStmtToKVs")
   262  			}
   263  			if outOfOrder {
   264  				return backupccl.BackupManifest{}, errors.Errorf("out of order row: %s", cmd)
   265  			}
   266  
   267  			if kvBytes > loadChunkBytes {
   268  				if err := writeSST(ctx, &backup, dir, tempPrefix, kvs, ts); err != nil {
   269  					return backupccl.BackupManifest{}, errors.Wrap(err, "writeSST")
   270  				}
   271  				kvs = kvs[:0]
   272  				kvBytes = 0
   273  			}
   274  
   275  		default:
   276  			return backupccl.BackupManifest{}, errors.Errorf("unsupported load statement: %q", stmt)
   277  		}
   278  	}
   279  
   280  	if tableDesc != nil {
   281  		if err := writeSST(ctx, &backup, dir, tempPrefix, kvs, ts); err != nil {
   282  			return backupccl.BackupManifest{}, errors.Wrap(err, "writeSST")
   283  		}
   284  	}
   285  
   286  	descBuf, err := protoutil.Marshal(&backup)
   287  	if err != nil {
   288  		return backupccl.BackupManifest{}, errors.Wrap(err, "marshal backup descriptor")
   289  	}
   290  	if err := dir.WriteFile(ctx, backupccl.BackupManifestName, bytes.NewReader(descBuf)); err != nil {
   291  		return backupccl.BackupManifest{}, errors.Wrap(err, "uploading backup descriptor")
   292  	}
   293  
   294  	return backup, nil
   295  }
   296  
   297  func insertStmtToKVs(
   298  	ctx context.Context,
   299  	tableDesc *sqlbase.ImmutableTableDescriptor,
   300  	defaultExprs []tree.TypedExpr,
   301  	cols []sqlbase.ColumnDescriptor,
   302  	evalCtx *tree.EvalContext,
   303  	ri row.Inserter,
   304  	stmt *tree.Insert,
   305  	f func(roachpb.KeyValue),
   306  ) error {
   307  	if stmt.OnConflict != nil {
   308  		return errors.Errorf("load insert: ON CONFLICT not supported: %q", stmt)
   309  	}
   310  	if tree.HasReturningClause(stmt.Returning) {
   311  		return errors.Errorf("load insert: RETURNING not supported: %q", stmt)
   312  	}
   313  	if len(stmt.Columns) > 0 {
   314  		if len(stmt.Columns) != len(cols) {
   315  			return errors.Errorf("load insert: wrong number of columns: %q", stmt)
   316  		}
   317  		for i := range tableDesc.Columns {
   318  			if stmt.Columns[i].String() != tableDesc.Columns[i].Name {
   319  				return errors.Errorf("load insert: unexpected column order: %q", stmt)
   320  			}
   321  		}
   322  	}
   323  	if stmt.Rows.Limit != nil {
   324  		return errors.Errorf("load insert: LIMIT not supported: %q", stmt)
   325  	}
   326  	if stmt.Rows.OrderBy != nil {
   327  		return errors.Errorf("load insert: ORDER BY not supported: %q", stmt)
   328  	}
   329  	values, ok := stmt.Rows.Select.(*tree.ValuesClause)
   330  	if !ok {
   331  		return errors.Errorf("load insert: expected VALUES clause: %q", stmt)
   332  	}
   333  
   334  	b := row.KVInserter(f)
   335  	computedIVarContainer := sqlbase.RowIndexedVarContainer{
   336  		Mapping: ri.InsertColIDtoRowIndex,
   337  		Cols:    tableDesc.Columns,
   338  	}
   339  	for _, tuple := range values.Rows {
   340  		insertRow := make([]tree.Datum, len(tuple))
   341  		for i, expr := range tuple {
   342  			if expr == tree.DNull {
   343  				insertRow[i] = tree.DNull
   344  				continue
   345  			}
   346  			c, ok := expr.(tree.Constant)
   347  			if !ok {
   348  				return errors.Errorf("unsupported expr: %q", expr)
   349  			}
   350  			var err error
   351  			insertRow[i], err = c.ResolveAsType(nil, tableDesc.Columns[i].Type)
   352  			if err != nil {
   353  				return err
   354  			}
   355  		}
   356  
   357  		// We have disallowed computed exprs.
   358  		var computeExprs []tree.TypedExpr
   359  		var computedCols []sqlbase.ColumnDescriptor
   360  
   361  		insertRow, err := row.GenerateInsertRow(
   362  			defaultExprs, computeExprs, cols, computedCols, evalCtx, tableDesc, insertRow, &computedIVarContainer,
   363  		)
   364  		if err != nil {
   365  			return errors.Wrapf(err, "process insert %q", insertRow)
   366  		}
   367  		// TODO(bram): Is the checking of FKs here required? If not, turning them
   368  		// off may provide a speed boost.
   369  		// TODO(mgartner): Add partial index IDs to ignoreIndexes that we should
   370  		// not add entries to.
   371  		var ignoreIndexes util.FastIntSet
   372  		if err := ri.InsertRow(ctx, b, insertRow, ignoreIndexes, true, row.CheckFKs, false /* traceKV */); err != nil {
   373  			return errors.Wrapf(err, "insert %q", insertRow)
   374  		}
   375  	}
   376  	return nil
   377  }
   378  
   379  func writeSST(
   380  	ctx context.Context,
   381  	backup *backupccl.BackupManifest,
   382  	base cloud.ExternalStorage,
   383  	tempPrefix string,
   384  	kvs []storage.MVCCKeyValue,
   385  	ts hlc.Timestamp,
   386  ) error {
   387  	if len(kvs) == 0 {
   388  		return nil
   389  	}
   390  
   391  	filename := fmt.Sprintf("load-%d.sst", rand.Int63())
   392  	log.Infof(ctx, "writesst %s", filename)
   393  
   394  	sstFile := &storage.MemFile{}
   395  	sst := storage.MakeBackupSSTWriter(sstFile)
   396  	defer sst.Close()
   397  	for _, kv := range kvs {
   398  		kv.Key.Timestamp = ts
   399  		if err := sst.Put(kv.Key, kv.Value); err != nil {
   400  			return err
   401  		}
   402  	}
   403  	err := sst.Finish()
   404  	if err != nil {
   405  		return err
   406  	}
   407  
   408  	// TODO(itsbilal): Pass a file handle into SSTWriter instead of writing to a
   409  	// MemFile first.
   410  	if err := base.WriteFile(ctx, filename, bytes.NewReader(sstFile.Data())); err != nil {
   411  		return err
   412  	}
   413  
   414  	backup.Files = append(backup.Files, backupccl.BackupManifest_File{
   415  		Span: roachpb.Span{
   416  			Key: kvs[0].Key.Key,
   417  			// The EndKey is exclusive, so use PrefixEnd to get the first key
   418  			// greater than the last key in the sst.
   419  			EndKey: kvs[len(kvs)-1].Key.Key.PrefixEnd(),
   420  		},
   421  		Path: filename,
   422  	})
   423  	backup.EntryCounts.DataSize += sst.DataSize
   424  	return nil
   425  }