github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/workloadccl/cliccl/fixtures.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package cliccl 10 11 import ( 12 "context" 13 gosql "database/sql" 14 "fmt" 15 "strings" 16 17 "cloud.google.com/go/storage" 18 "github.com/cockroachdb/cockroach/pkg/ccl/workloadccl" 19 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 20 "github.com/cockroachdb/cockroach/pkg/util/log" 21 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 22 "github.com/cockroachdb/cockroach/pkg/workload" 23 workloadcli "github.com/cockroachdb/cockroach/pkg/workload/cli" 24 "github.com/cockroachdb/cockroach/pkg/workload/workloadsql" 25 "github.com/cockroachdb/errors" 26 "github.com/spf13/cobra" 27 "github.com/spf13/pflag" 28 "google.golang.org/api/option" 29 ) 30 31 var useast1bFixtures = workloadccl.FixtureConfig{ 32 // TODO(dan): Keep fixtures in more than one region to better support 33 // geo-distributed clusters. 34 GCSBucket: `cockroach-fixtures`, 35 GCSPrefix: `workload`, 36 } 37 38 func config() workloadccl.FixtureConfig { 39 config := useast1bFixtures 40 if len(*gcsBucketOverride) > 0 { 41 config.GCSBucket = *gcsBucketOverride 42 } 43 if len(*gcsPrefixOverride) > 0 { 44 config.GCSPrefix = *gcsPrefixOverride 45 } 46 if len(*gcsBillingProjectOverride) > 0 { 47 config.BillingProject = *gcsBillingProjectOverride 48 } 49 config.CSVServerURL = *fixturesMakeImportCSVServerURL 50 return config 51 } 52 53 var fixturesCmd = workloadcli.SetCmdDefaults(&cobra.Command{ 54 Use: `fixtures`, 55 Short: `tools for quickly synthesizing and loading large datasets`, 56 }) 57 var fixturesListCmd = workloadcli.SetCmdDefaults(&cobra.Command{ 58 Use: `list`, 59 Short: `list all fixtures stored on GCS`, 60 Run: workloadcli.HandleErrs(fixturesList), 61 }) 62 var fixturesMakeCmd = workloadcli.SetCmdDefaults(&cobra.Command{ 63 Use: `make`, 64 Short: `IMPORT a fixture and then store a BACKUP of it on GCS`, 65 }) 66 var fixturesLoadCmd = workloadcli.SetCmdDefaults(&cobra.Command{ 67 Use: `load`, 68 Short: `load a fixture into a running cluster. An enterprise license is required.`, 69 }) 70 var fixturesImportCmd = workloadcli.SetCmdDefaults(&cobra.Command{ 71 Use: `import`, 72 Short: `import a fixture into a running cluster. An enterprise license is NOT required.`, 73 }) 74 var fixturesURLCmd = workloadcli.SetCmdDefaults(&cobra.Command{ 75 Use: `url`, 76 Short: `generate the GCS URL for a fixture`, 77 }) 78 79 var fixturesLoadImportShared = pflag.NewFlagSet(`load/import`, pflag.ContinueOnError) 80 var fixturesMakeImportShared = pflag.NewFlagSet(`load/import`, pflag.ContinueOnError) 81 82 var fixturesMakeImportCSVServerURL = fixturesMakeImportShared.String( 83 `csv-server`, ``, 84 `Skip saving CSVs to cloud storage, instead get them from a 'csv-server' running at this url`) 85 86 var fixturesMakeOnlyTable = fixturesMakeCmd.PersistentFlags().String( 87 `only-tables`, ``, 88 `Only load the tables with the given comma-separated names`) 89 90 var fixturesMakeFilesPerNode = fixturesMakeCmd.PersistentFlags().Int( 91 `files-per-node`, 1, 92 `number of file URLs to generate per node when using csv-server`) 93 94 var fixturesImportFilesPerNode = fixturesImportCmd.PersistentFlags().Int( 95 `files-per-node`, 1, 96 `number of file URLs to generate per node`) 97 98 var fixturesRunChecks = fixturesLoadImportShared.Bool( 99 `checks`, true, `Run validity checks on the loaded fixture`) 100 101 var fixturesImportInjectStats = fixturesImportCmd.PersistentFlags().Bool( 102 `inject-stats`, true, `Inject pre-calculated statistics if they are available`) 103 104 var gcsBucketOverride, gcsPrefixOverride, gcsBillingProjectOverride *string 105 106 func init() { 107 gcsBucketOverride = fixturesCmd.PersistentFlags().String(`gcs-bucket-override`, ``, ``) 108 gcsPrefixOverride = fixturesCmd.PersistentFlags().String(`gcs-prefix-override`, ``, ``) 109 _ = fixturesCmd.PersistentFlags().MarkHidden(`gcs-bucket-override`) 110 _ = fixturesCmd.PersistentFlags().MarkHidden(`gcs-prefix-override`) 111 112 gcsBillingProjectOverride = fixturesCmd.PersistentFlags().String( 113 `gcs-billing-project`, ``, 114 `Google Cloud project to use for storage billing; `+ 115 `required to be non-empty if the bucket is requestor pays`) 116 } 117 118 const storageError = `failed to create google cloud client ` + 119 `(You may need to setup the GCS application default credentials: ` + 120 `'gcloud auth application-default login --project=cockroach-shared')` 121 122 // getStorage returns a GCS client using "application default" credentials. The 123 // caller is responsible for closing it. 124 func getStorage(ctx context.Context) (*storage.Client, error) { 125 // TODO(dan): Right now, we don't need all the complexity of 126 // cloud.ExternalStorage, but if we start supporting more than just GCS, 127 // this should probably be switched to it. 128 g, err := storage.NewClient(ctx, option.WithScopes(storage.ScopeReadWrite)) 129 return g, errors.Wrap(err, storageError) 130 } 131 132 func init() { 133 workloadcli.AddSubCmd(func(userFacing bool) *cobra.Command { 134 for _, meta := range workload.Registered() { 135 gen := meta.New() 136 var genFlags *pflag.FlagSet 137 if f, ok := gen.(workload.Flagser); ok { 138 genFlags = f.Flags().FlagSet 139 // Hide runtime-only flags so they don't clutter up the help text, 140 // but don't remove them entirely so if someone switches from 141 // `./workload run` to `./workload fixtures` they don't have to 142 // remove them from the invocation. 143 for flagName, meta := range f.Flags().Meta { 144 if meta.RuntimeOnly || meta.CheckConsistencyOnly { 145 _ = genFlags.MarkHidden(flagName) 146 } 147 } 148 } 149 150 genMakeCmd := workloadcli.SetCmdDefaults(&cobra.Command{ 151 Use: meta.Name + ` [CRDB URI]`, 152 Args: cobra.RangeArgs(0, 1), 153 }) 154 genMakeCmd.Flags().AddFlagSet(genFlags) 155 genMakeCmd.Flags().AddFlagSet(fixturesMakeImportShared) 156 genMakeCmd.Run = workloadcli.CmdHelper(gen, fixturesMake) 157 fixturesMakeCmd.AddCommand(genMakeCmd) 158 159 genLoadCmd := workloadcli.SetCmdDefaults(&cobra.Command{ 160 Use: meta.Name + ` [CRDB URI]`, 161 Args: cobra.RangeArgs(0, 1), 162 }) 163 genLoadCmd.Flags().AddFlagSet(genFlags) 164 genLoadCmd.Flags().AddFlagSet(fixturesLoadImportShared) 165 genLoadCmd.Run = workloadcli.CmdHelper(gen, fixturesLoad) 166 fixturesLoadCmd.AddCommand(genLoadCmd) 167 168 genImportCmd := workloadcli.SetCmdDefaults(&cobra.Command{ 169 Use: meta.Name + ` [CRDB URI]`, 170 Args: cobra.RangeArgs(0, 1), 171 }) 172 genImportCmd.Flags().AddFlagSet(genFlags) 173 genImportCmd.Flags().AddFlagSet(fixturesLoadImportShared) 174 genImportCmd.Flags().AddFlagSet(fixturesMakeImportShared) 175 genImportCmd.Run = workloadcli.CmdHelper(gen, fixturesImport) 176 fixturesImportCmd.AddCommand(genImportCmd) 177 178 genURLCmd := workloadcli.SetCmdDefaults(&cobra.Command{ 179 Use: meta.Name, 180 Args: cobra.NoArgs, 181 }) 182 genURLCmd.Flags().AddFlagSet(genFlags) 183 genURLCmd.Run = fixturesURL(gen) 184 fixturesURLCmd.AddCommand(genURLCmd) 185 } 186 fixturesCmd.AddCommand(fixturesListCmd) 187 fixturesCmd.AddCommand(fixturesMakeCmd) 188 fixturesCmd.AddCommand(fixturesLoadCmd) 189 fixturesCmd.AddCommand(fixturesImportCmd) 190 fixturesCmd.AddCommand(fixturesURLCmd) 191 return fixturesCmd 192 }) 193 } 194 195 func fixturesList(_ *cobra.Command, _ []string) error { 196 ctx := context.Background() 197 gcs, err := getStorage(ctx) 198 if err != nil { 199 return err 200 } 201 defer func() { _ = gcs.Close() }() 202 fixtures, err := workloadccl.ListFixtures(ctx, gcs, config()) 203 if err != nil { 204 return err 205 } 206 for _, fixture := range fixtures { 207 fmt.Println(fixture) 208 } 209 return nil 210 } 211 212 type filteringGenerator struct { 213 gen workload.Generator 214 filter map[string]struct{} 215 } 216 217 func (f filteringGenerator) Meta() workload.Meta { 218 return f.gen.Meta() 219 } 220 221 func (f filteringGenerator) Tables() []workload.Table { 222 ret := make([]workload.Table, 0) 223 for _, t := range f.gen.Tables() { 224 if _, ok := f.filter[t.Name]; ok { 225 ret = append(ret, t) 226 } 227 } 228 return ret 229 } 230 231 func fixturesMake(gen workload.Generator, urls []string, _ string) error { 232 ctx := context.Background() 233 gcs, err := getStorage(ctx) 234 if err != nil { 235 return err 236 } 237 defer func() { _ = gcs.Close() }() 238 239 sqlDB, err := gosql.Open(`cockroach`, strings.Join(urls, ` `)) 240 if err != nil { 241 return err 242 } 243 if *fixturesMakeOnlyTable != "" { 244 tableNames := strings.Split(*fixturesMakeOnlyTable, ",") 245 if len(tableNames) == 0 { 246 return errors.New("no table names specified") 247 } 248 filter := make(map[string]struct{}, len(tableNames)) 249 for _, tableName := range tableNames { 250 filter[tableName] = struct{}{} 251 } 252 gen = filteringGenerator{ 253 gen: gen, 254 filter: filter, 255 } 256 } 257 filesPerNode := *fixturesMakeFilesPerNode 258 fixture, err := workloadccl.MakeFixture(ctx, sqlDB, gcs, config(), gen, filesPerNode) 259 if err != nil { 260 return err 261 } 262 for _, table := range fixture.Tables { 263 log.Infof(ctx, `stored backup %s`, table.BackupURI) 264 } 265 return nil 266 } 267 268 // restoreDataLoader is an InitialDataLoader implementation that loads data with 269 // RESTORE. 270 type restoreDataLoader struct { 271 fixture workloadccl.Fixture 272 database string 273 } 274 275 // InitialDataLoad implements the InitialDataLoader interface. 276 func (l restoreDataLoader) InitialDataLoad( 277 ctx context.Context, db *gosql.DB, gen workload.Generator, 278 ) (int64, error) { 279 log.Infof(ctx, "starting restore of %d tables", len(gen.Tables())) 280 start := timeutil.Now() 281 bytes, err := workloadccl.RestoreFixture(ctx, db, l.fixture, l.database, true /* injectStats */) 282 if err != nil { 283 return 0, errors.Wrap(err, `restoring fixture`) 284 } 285 elapsed := timeutil.Since(start) 286 log.Infof(ctx, "restored %s bytes in %d tables (took %s, %s)", 287 humanizeutil.IBytes(bytes), len(gen.Tables()), elapsed, humanizeutil.DataRate(bytes, elapsed)) 288 return bytes, nil 289 } 290 291 func fixturesLoad(gen workload.Generator, urls []string, dbName string) error { 292 ctx := context.Background() 293 gcs, err := getStorage(ctx) 294 if err != nil { 295 return err 296 } 297 defer func() { _ = gcs.Close() }() 298 299 sqlDB, err := gosql.Open(`cockroach`, strings.Join(urls, ` `)) 300 if err != nil { 301 return err 302 } 303 if _, err := sqlDB.Exec(`CREATE DATABASE IF NOT EXISTS ` + dbName); err != nil { 304 return err 305 } 306 307 fixture, err := workloadccl.GetFixture(ctx, gcs, config(), gen) 308 if err != nil { 309 return errors.Wrap(err, `finding fixture`) 310 } 311 312 l := restoreDataLoader{fixture: fixture, database: dbName} 313 if _, err := workloadsql.Setup(ctx, sqlDB, gen, l); err != nil { 314 return err 315 } 316 317 if hooks, ok := gen.(workload.Hookser); *fixturesRunChecks && ok { 318 if consistencyCheckFn := hooks.Hooks().CheckConsistency; consistencyCheckFn != nil { 319 log.Info(ctx, "fixture is imported; now running consistency checks (ctrl-c to abort)") 320 if err := consistencyCheckFn(ctx, sqlDB); err != nil { 321 return err 322 } 323 } 324 } 325 326 return nil 327 } 328 329 func fixturesImport(gen workload.Generator, urls []string, dbName string) error { 330 ctx := context.Background() 331 sqlDB, err := gosql.Open(`cockroach`, strings.Join(urls, ` `)) 332 if err != nil { 333 return err 334 } 335 if _, err := sqlDB.Exec(`CREATE DATABASE IF NOT EXISTS ` + dbName); err != nil { 336 return err 337 } 338 339 l := workloadccl.ImportDataLoader{ 340 FilesPerNode: *fixturesImportFilesPerNode, 341 InjectStats: *fixturesImportInjectStats, 342 CSVServer: *fixturesMakeImportCSVServerURL, 343 } 344 if _, err := workloadsql.Setup(ctx, sqlDB, gen, l); err != nil { 345 return err 346 } 347 348 if hooks, ok := gen.(workload.Hookser); *fixturesRunChecks && ok { 349 if consistencyCheckFn := hooks.Hooks().CheckConsistency; consistencyCheckFn != nil { 350 log.Info(ctx, "fixture is restored; now running consistency checks (ctrl-c to abort)") 351 if err := consistencyCheckFn(ctx, sqlDB); err != nil { 352 return err 353 } 354 } 355 } 356 357 return nil 358 } 359 360 func fixturesURL(gen workload.Generator) func(*cobra.Command, []string) { 361 return workloadcli.HandleErrs(func(*cobra.Command, []string) error { 362 if h, ok := gen.(workload.Hookser); ok { 363 if err := h.Hooks().Validate(); err != nil { 364 return err 365 } 366 } 367 368 fmt.Println(workloadccl.FixtureURL(config(), gen)) 369 return nil 370 }) 371 }