github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/workloadccl/cliccl/fixtures.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package cliccl
    10  
    11  import (
    12  	"context"
    13  	gosql "database/sql"
    14  	"fmt"
    15  	"strings"
    16  
    17  	"cloud.google.com/go/storage"
    18  	"github.com/cockroachdb/cockroach/pkg/ccl/workloadccl"
    19  	"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
    20  	"github.com/cockroachdb/cockroach/pkg/util/log"
    21  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    22  	"github.com/cockroachdb/cockroach/pkg/workload"
    23  	workloadcli "github.com/cockroachdb/cockroach/pkg/workload/cli"
    24  	"github.com/cockroachdb/cockroach/pkg/workload/workloadsql"
    25  	"github.com/cockroachdb/errors"
    26  	"github.com/spf13/cobra"
    27  	"github.com/spf13/pflag"
    28  	"google.golang.org/api/option"
    29  )
    30  
    31  var useast1bFixtures = workloadccl.FixtureConfig{
    32  	// TODO(dan): Keep fixtures in more than one region to better support
    33  	// geo-distributed clusters.
    34  	GCSBucket: `cockroach-fixtures`,
    35  	GCSPrefix: `workload`,
    36  }
    37  
    38  func config() workloadccl.FixtureConfig {
    39  	config := useast1bFixtures
    40  	if len(*gcsBucketOverride) > 0 {
    41  		config.GCSBucket = *gcsBucketOverride
    42  	}
    43  	if len(*gcsPrefixOverride) > 0 {
    44  		config.GCSPrefix = *gcsPrefixOverride
    45  	}
    46  	if len(*gcsBillingProjectOverride) > 0 {
    47  		config.BillingProject = *gcsBillingProjectOverride
    48  	}
    49  	config.CSVServerURL = *fixturesMakeImportCSVServerURL
    50  	return config
    51  }
    52  
    53  var fixturesCmd = workloadcli.SetCmdDefaults(&cobra.Command{
    54  	Use:   `fixtures`,
    55  	Short: `tools for quickly synthesizing and loading large datasets`,
    56  })
    57  var fixturesListCmd = workloadcli.SetCmdDefaults(&cobra.Command{
    58  	Use:   `list`,
    59  	Short: `list all fixtures stored on GCS`,
    60  	Run:   workloadcli.HandleErrs(fixturesList),
    61  })
    62  var fixturesMakeCmd = workloadcli.SetCmdDefaults(&cobra.Command{
    63  	Use:   `make`,
    64  	Short: `IMPORT a fixture and then store a BACKUP of it on GCS`,
    65  })
    66  var fixturesLoadCmd = workloadcli.SetCmdDefaults(&cobra.Command{
    67  	Use:   `load`,
    68  	Short: `load a fixture into a running cluster. An enterprise license is required.`,
    69  })
    70  var fixturesImportCmd = workloadcli.SetCmdDefaults(&cobra.Command{
    71  	Use:   `import`,
    72  	Short: `import a fixture into a running cluster. An enterprise license is NOT required.`,
    73  })
    74  var fixturesURLCmd = workloadcli.SetCmdDefaults(&cobra.Command{
    75  	Use:   `url`,
    76  	Short: `generate the GCS URL for a fixture`,
    77  })
    78  
    79  var fixturesLoadImportShared = pflag.NewFlagSet(`load/import`, pflag.ContinueOnError)
    80  var fixturesMakeImportShared = pflag.NewFlagSet(`load/import`, pflag.ContinueOnError)
    81  
    82  var fixturesMakeImportCSVServerURL = fixturesMakeImportShared.String(
    83  	`csv-server`, ``,
    84  	`Skip saving CSVs to cloud storage, instead get them from a 'csv-server' running at this url`)
    85  
    86  var fixturesMakeOnlyTable = fixturesMakeCmd.PersistentFlags().String(
    87  	`only-tables`, ``,
    88  	`Only load the tables with the given comma-separated names`)
    89  
    90  var fixturesMakeFilesPerNode = fixturesMakeCmd.PersistentFlags().Int(
    91  	`files-per-node`, 1,
    92  	`number of file URLs to generate per node when using csv-server`)
    93  
    94  var fixturesImportFilesPerNode = fixturesImportCmd.PersistentFlags().Int(
    95  	`files-per-node`, 1,
    96  	`number of file URLs to generate per node`)
    97  
    98  var fixturesRunChecks = fixturesLoadImportShared.Bool(
    99  	`checks`, true, `Run validity checks on the loaded fixture`)
   100  
   101  var fixturesImportInjectStats = fixturesImportCmd.PersistentFlags().Bool(
   102  	`inject-stats`, true, `Inject pre-calculated statistics if they are available`)
   103  
   104  var gcsBucketOverride, gcsPrefixOverride, gcsBillingProjectOverride *string
   105  
   106  func init() {
   107  	gcsBucketOverride = fixturesCmd.PersistentFlags().String(`gcs-bucket-override`, ``, ``)
   108  	gcsPrefixOverride = fixturesCmd.PersistentFlags().String(`gcs-prefix-override`, ``, ``)
   109  	_ = fixturesCmd.PersistentFlags().MarkHidden(`gcs-bucket-override`)
   110  	_ = fixturesCmd.PersistentFlags().MarkHidden(`gcs-prefix-override`)
   111  
   112  	gcsBillingProjectOverride = fixturesCmd.PersistentFlags().String(
   113  		`gcs-billing-project`, ``,
   114  		`Google Cloud project to use for storage billing; `+
   115  			`required to be non-empty if the bucket is requestor pays`)
   116  }
   117  
   118  const storageError = `failed to create google cloud client ` +
   119  	`(You may need to setup the GCS application default credentials: ` +
   120  	`'gcloud auth application-default login --project=cockroach-shared')`
   121  
   122  // getStorage returns a GCS client using "application default" credentials. The
   123  // caller is responsible for closing it.
   124  func getStorage(ctx context.Context) (*storage.Client, error) {
   125  	// TODO(dan): Right now, we don't need all the complexity of
   126  	// cloud.ExternalStorage, but if we start supporting more than just GCS,
   127  	// this should probably be switched to it.
   128  	g, err := storage.NewClient(ctx, option.WithScopes(storage.ScopeReadWrite))
   129  	return g, errors.Wrap(err, storageError)
   130  }
   131  
   132  func init() {
   133  	workloadcli.AddSubCmd(func(userFacing bool) *cobra.Command {
   134  		for _, meta := range workload.Registered() {
   135  			gen := meta.New()
   136  			var genFlags *pflag.FlagSet
   137  			if f, ok := gen.(workload.Flagser); ok {
   138  				genFlags = f.Flags().FlagSet
   139  				// Hide runtime-only flags so they don't clutter up the help text,
   140  				// but don't remove them entirely so if someone switches from
   141  				// `./workload run` to `./workload fixtures` they don't have to
   142  				// remove them from the invocation.
   143  				for flagName, meta := range f.Flags().Meta {
   144  					if meta.RuntimeOnly || meta.CheckConsistencyOnly {
   145  						_ = genFlags.MarkHidden(flagName)
   146  					}
   147  				}
   148  			}
   149  
   150  			genMakeCmd := workloadcli.SetCmdDefaults(&cobra.Command{
   151  				Use:  meta.Name + ` [CRDB URI]`,
   152  				Args: cobra.RangeArgs(0, 1),
   153  			})
   154  			genMakeCmd.Flags().AddFlagSet(genFlags)
   155  			genMakeCmd.Flags().AddFlagSet(fixturesMakeImportShared)
   156  			genMakeCmd.Run = workloadcli.CmdHelper(gen, fixturesMake)
   157  			fixturesMakeCmd.AddCommand(genMakeCmd)
   158  
   159  			genLoadCmd := workloadcli.SetCmdDefaults(&cobra.Command{
   160  				Use:  meta.Name + ` [CRDB URI]`,
   161  				Args: cobra.RangeArgs(0, 1),
   162  			})
   163  			genLoadCmd.Flags().AddFlagSet(genFlags)
   164  			genLoadCmd.Flags().AddFlagSet(fixturesLoadImportShared)
   165  			genLoadCmd.Run = workloadcli.CmdHelper(gen, fixturesLoad)
   166  			fixturesLoadCmd.AddCommand(genLoadCmd)
   167  
   168  			genImportCmd := workloadcli.SetCmdDefaults(&cobra.Command{
   169  				Use:  meta.Name + ` [CRDB URI]`,
   170  				Args: cobra.RangeArgs(0, 1),
   171  			})
   172  			genImportCmd.Flags().AddFlagSet(genFlags)
   173  			genImportCmd.Flags().AddFlagSet(fixturesLoadImportShared)
   174  			genImportCmd.Flags().AddFlagSet(fixturesMakeImportShared)
   175  			genImportCmd.Run = workloadcli.CmdHelper(gen, fixturesImport)
   176  			fixturesImportCmd.AddCommand(genImportCmd)
   177  
   178  			genURLCmd := workloadcli.SetCmdDefaults(&cobra.Command{
   179  				Use:  meta.Name,
   180  				Args: cobra.NoArgs,
   181  			})
   182  			genURLCmd.Flags().AddFlagSet(genFlags)
   183  			genURLCmd.Run = fixturesURL(gen)
   184  			fixturesURLCmd.AddCommand(genURLCmd)
   185  		}
   186  		fixturesCmd.AddCommand(fixturesListCmd)
   187  		fixturesCmd.AddCommand(fixturesMakeCmd)
   188  		fixturesCmd.AddCommand(fixturesLoadCmd)
   189  		fixturesCmd.AddCommand(fixturesImportCmd)
   190  		fixturesCmd.AddCommand(fixturesURLCmd)
   191  		return fixturesCmd
   192  	})
   193  }
   194  
   195  func fixturesList(_ *cobra.Command, _ []string) error {
   196  	ctx := context.Background()
   197  	gcs, err := getStorage(ctx)
   198  	if err != nil {
   199  		return err
   200  	}
   201  	defer func() { _ = gcs.Close() }()
   202  	fixtures, err := workloadccl.ListFixtures(ctx, gcs, config())
   203  	if err != nil {
   204  		return err
   205  	}
   206  	for _, fixture := range fixtures {
   207  		fmt.Println(fixture)
   208  	}
   209  	return nil
   210  }
   211  
   212  type filteringGenerator struct {
   213  	gen    workload.Generator
   214  	filter map[string]struct{}
   215  }
   216  
   217  func (f filteringGenerator) Meta() workload.Meta {
   218  	return f.gen.Meta()
   219  }
   220  
   221  func (f filteringGenerator) Tables() []workload.Table {
   222  	ret := make([]workload.Table, 0)
   223  	for _, t := range f.gen.Tables() {
   224  		if _, ok := f.filter[t.Name]; ok {
   225  			ret = append(ret, t)
   226  		}
   227  	}
   228  	return ret
   229  }
   230  
   231  func fixturesMake(gen workload.Generator, urls []string, _ string) error {
   232  	ctx := context.Background()
   233  	gcs, err := getStorage(ctx)
   234  	if err != nil {
   235  		return err
   236  	}
   237  	defer func() { _ = gcs.Close() }()
   238  
   239  	sqlDB, err := gosql.Open(`cockroach`, strings.Join(urls, ` `))
   240  	if err != nil {
   241  		return err
   242  	}
   243  	if *fixturesMakeOnlyTable != "" {
   244  		tableNames := strings.Split(*fixturesMakeOnlyTable, ",")
   245  		if len(tableNames) == 0 {
   246  			return errors.New("no table names specified")
   247  		}
   248  		filter := make(map[string]struct{}, len(tableNames))
   249  		for _, tableName := range tableNames {
   250  			filter[tableName] = struct{}{}
   251  		}
   252  		gen = filteringGenerator{
   253  			gen:    gen,
   254  			filter: filter,
   255  		}
   256  	}
   257  	filesPerNode := *fixturesMakeFilesPerNode
   258  	fixture, err := workloadccl.MakeFixture(ctx, sqlDB, gcs, config(), gen, filesPerNode)
   259  	if err != nil {
   260  		return err
   261  	}
   262  	for _, table := range fixture.Tables {
   263  		log.Infof(ctx, `stored backup %s`, table.BackupURI)
   264  	}
   265  	return nil
   266  }
   267  
   268  // restoreDataLoader is an InitialDataLoader implementation that loads data with
   269  // RESTORE.
   270  type restoreDataLoader struct {
   271  	fixture  workloadccl.Fixture
   272  	database string
   273  }
   274  
   275  // InitialDataLoad implements the InitialDataLoader interface.
   276  func (l restoreDataLoader) InitialDataLoad(
   277  	ctx context.Context, db *gosql.DB, gen workload.Generator,
   278  ) (int64, error) {
   279  	log.Infof(ctx, "starting restore of %d tables", len(gen.Tables()))
   280  	start := timeutil.Now()
   281  	bytes, err := workloadccl.RestoreFixture(ctx, db, l.fixture, l.database, true /* injectStats */)
   282  	if err != nil {
   283  		return 0, errors.Wrap(err, `restoring fixture`)
   284  	}
   285  	elapsed := timeutil.Since(start)
   286  	log.Infof(ctx, "restored %s bytes in %d tables (took %s, %s)",
   287  		humanizeutil.IBytes(bytes), len(gen.Tables()), elapsed, humanizeutil.DataRate(bytes, elapsed))
   288  	return bytes, nil
   289  }
   290  
   291  func fixturesLoad(gen workload.Generator, urls []string, dbName string) error {
   292  	ctx := context.Background()
   293  	gcs, err := getStorage(ctx)
   294  	if err != nil {
   295  		return err
   296  	}
   297  	defer func() { _ = gcs.Close() }()
   298  
   299  	sqlDB, err := gosql.Open(`cockroach`, strings.Join(urls, ` `))
   300  	if err != nil {
   301  		return err
   302  	}
   303  	if _, err := sqlDB.Exec(`CREATE DATABASE IF NOT EXISTS ` + dbName); err != nil {
   304  		return err
   305  	}
   306  
   307  	fixture, err := workloadccl.GetFixture(ctx, gcs, config(), gen)
   308  	if err != nil {
   309  		return errors.Wrap(err, `finding fixture`)
   310  	}
   311  
   312  	l := restoreDataLoader{fixture: fixture, database: dbName}
   313  	if _, err := workloadsql.Setup(ctx, sqlDB, gen, l); err != nil {
   314  		return err
   315  	}
   316  
   317  	if hooks, ok := gen.(workload.Hookser); *fixturesRunChecks && ok {
   318  		if consistencyCheckFn := hooks.Hooks().CheckConsistency; consistencyCheckFn != nil {
   319  			log.Info(ctx, "fixture is imported; now running consistency checks (ctrl-c to abort)")
   320  			if err := consistencyCheckFn(ctx, sqlDB); err != nil {
   321  				return err
   322  			}
   323  		}
   324  	}
   325  
   326  	return nil
   327  }
   328  
   329  func fixturesImport(gen workload.Generator, urls []string, dbName string) error {
   330  	ctx := context.Background()
   331  	sqlDB, err := gosql.Open(`cockroach`, strings.Join(urls, ` `))
   332  	if err != nil {
   333  		return err
   334  	}
   335  	if _, err := sqlDB.Exec(`CREATE DATABASE IF NOT EXISTS ` + dbName); err != nil {
   336  		return err
   337  	}
   338  
   339  	l := workloadccl.ImportDataLoader{
   340  		FilesPerNode: *fixturesImportFilesPerNode,
   341  		InjectStats:  *fixturesImportInjectStats,
   342  		CSVServer:    *fixturesMakeImportCSVServerURL,
   343  	}
   344  	if _, err := workloadsql.Setup(ctx, sqlDB, gen, l); err != nil {
   345  		return err
   346  	}
   347  
   348  	if hooks, ok := gen.(workload.Hookser); *fixturesRunChecks && ok {
   349  		if consistencyCheckFn := hooks.Hooks().CheckConsistency; consistencyCheckFn != nil {
   350  			log.Info(ctx, "fixture is restored; now running consistency checks (ctrl-c to abort)")
   351  			if err := consistencyCheckFn(ctx, sqlDB); err != nil {
   352  				return err
   353  			}
   354  		}
   355  	}
   356  
   357  	return nil
   358  }
   359  
   360  func fixturesURL(gen workload.Generator) func(*cobra.Command, []string) {
   361  	return workloadcli.HandleErrs(func(*cobra.Command, []string) error {
   362  		if h, ok := gen.(workload.Hookser); ok {
   363  			if err := h.Hooks().Validate(); err != nil {
   364  				return err
   365  			}
   366  		}
   367  
   368  		fmt.Println(workloadccl.FixtureURL(config(), gen))
   369  		return nil
   370  	})
   371  }