github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cli/demo_cluster.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package cli
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  	"io"
    18  	"io/ioutil"
    19  	"net/url"
    20  	"os"
    21  	"path/filepath"
    22  	"strconv"
    23  	"time"
    24  
    25  	"github.com/cockroachdb/cockroach/pkg/base"
    26  	"github.com/cockroachdb/cockroach/pkg/cli/cliflags"
    27  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    28  	"github.com/cockroachdb/cockroach/pkg/rpc"
    29  	"github.com/cockroachdb/cockroach/pkg/security"
    30  	"github.com/cockroachdb/cockroach/pkg/server"
    31  	"github.com/cockroachdb/cockroach/pkg/server/serverpb"
    32  	"github.com/cockroachdb/cockroach/pkg/server/status"
    33  	"github.com/cockroachdb/cockroach/pkg/sql"
    34  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    35  	"github.com/cockroachdb/cockroach/pkg/util/log"
    36  	"github.com/cockroachdb/cockroach/pkg/util/log/logflags"
    37  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    38  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    39  	"github.com/cockroachdb/cockroach/pkg/workload"
    40  	"github.com/cockroachdb/cockroach/pkg/workload/histogram"
    41  	"github.com/cockroachdb/cockroach/pkg/workload/workloadsql"
    42  	"github.com/cockroachdb/errors"
    43  	"github.com/spf13/cobra"
    44  	"golang.org/x/time/rate"
    45  )
    46  
    47  type transientCluster struct {
    48  	connURL    string
    49  	demoDir    string
    50  	useSockets bool
    51  	stopper    *stop.Stopper
    52  	s          *server.TestServer
    53  	servers    []*server.TestServer
    54  }
    55  
    56  func setupTransientCluster(
    57  	ctx context.Context, cmd *cobra.Command, gen workload.Generator,
    58  ) (c transientCluster, err error) {
    59  	// useSockets is true on unix, false on windows.
    60  	c.useSockets = useUnixSocketsInDemo()
    61  
    62  	// The user specified some localities for their nodes.
    63  	if len(demoCtx.localities) != 0 {
    64  		// Error out of localities don't line up with requested node
    65  		// count before doing any sort of setup.
    66  		if len(demoCtx.localities) != demoCtx.nodes {
    67  			return c, errors.Errorf("number of localities specified must equal number of nodes")
    68  		}
    69  	} else {
    70  		demoCtx.localities = make([]roachpb.Locality, demoCtx.nodes)
    71  		for i := 0; i < demoCtx.nodes; i++ {
    72  			demoCtx.localities[i] = defaultLocalities[i%len(defaultLocalities)]
    73  		}
    74  	}
    75  
    76  	// Set up logging. For demo/transient server we use non-standard
    77  	// behavior where we avoid file creation if possible.
    78  	fl := flagSetForCmd(cmd)
    79  	df := fl.Lookup(cliflags.LogDir.Name)
    80  	sf := fl.Lookup(logflags.LogToStderrName)
    81  	if !df.Changed && !sf.Changed {
    82  		// User did not request logging flags; shut down all logging.
    83  		// Otherwise, the demo command would cause a cockroach-data
    84  		// directory to appear in the current directory just for logs.
    85  		_ = df.Value.Set("")
    86  		df.Changed = true
    87  		_ = sf.Value.Set(log.Severity_NONE.String())
    88  		sf.Changed = true
    89  	}
    90  	c.stopper, err = setupAndInitializeLoggingAndProfiling(ctx, cmd)
    91  	if err != nil {
    92  		return c, err
    93  	}
    94  	maybeWarnMemSize(ctx)
    95  
    96  	// Create a temporary directory for certificates (if secure) and
    97  	// the unix sockets.
    98  	// The directory is removed in the cleanup() method.
    99  	if c.demoDir, err = ioutil.TempDir("", "demo"); err != nil {
   100  		return c, err
   101  	}
   102  
   103  	if !demoCtx.insecure {
   104  		if err := generateCerts(c.demoDir); err != nil {
   105  			return c, err
   106  		}
   107  	}
   108  
   109  	serverFactory := server.TestServerFactory
   110  	var servers []*server.TestServer
   111  
   112  	// latencyMapWaitCh is used to block test servers after RPC address computation until the artificial
   113  	// latency map has been constructed.
   114  	latencyMapWaitCh := make(chan struct{})
   115  
   116  	// errCh is used to catch all errors when initializing servers.
   117  	// Sending a nil on this channel indicates success.
   118  	errCh := make(chan error, demoCtx.nodes)
   119  
   120  	for i := 0; i < demoCtx.nodes; i++ {
   121  		// All the nodes connect to the address of the first server created.
   122  		var joinAddr string
   123  		if c.s != nil {
   124  			joinAddr = c.s.ServingRPCAddr()
   125  		}
   126  		nodeID := roachpb.NodeID(i + 1)
   127  		args := testServerArgsForTransientCluster(c.sockForServer(nodeID), nodeID, joinAddr, c.demoDir)
   128  
   129  		// servRPCReadyCh is used if latency simulation is requested to notify that a test server has
   130  		// successfully computed its RPC address.
   131  		servRPCReadyCh := make(chan struct{})
   132  
   133  		if demoCtx.simulateLatency {
   134  			args.Knobs = base.TestingKnobs{
   135  				Server: &server.TestingKnobs{
   136  					PauseAfterGettingRPCAddress:  latencyMapWaitCh,
   137  					SignalAfterGettingRPCAddress: servRPCReadyCh,
   138  					ContextTestingKnobs: rpc.ContextTestingKnobs{
   139  						ArtificialLatencyMap: make(map[string]int),
   140  					},
   141  				},
   142  			}
   143  		}
   144  
   145  		serv := serverFactory.New(args).(*server.TestServer)
   146  
   147  		if i == 0 {
   148  			c.s = serv
   149  		}
   150  		servers = append(servers, serv)
   151  
   152  		// We force a wait for all servers until they are ready.
   153  		servReadyFnCh := make(chan struct{})
   154  		serv.Cfg.ReadyFn = func(bool) {
   155  			close(servReadyFnCh)
   156  		}
   157  
   158  		// If latency simulation is requested, start the servers in a background thread. We do this because
   159  		// the start routine needs to wait for the latency map construction after their RPC address has been computed.
   160  		if demoCtx.simulateLatency {
   161  			go func(i int) {
   162  				if err := serv.Start(args); err != nil {
   163  					errCh <- err
   164  				} else {
   165  					// Block until the ReadyFn has been called before continuing.
   166  					<-servReadyFnCh
   167  					errCh <- nil
   168  				}
   169  			}(i)
   170  			<-servRPCReadyCh
   171  		} else {
   172  			if err := serv.Start(args); err != nil {
   173  				return c, err
   174  			}
   175  			// Block until the ReadyFn has been called before continuing.
   176  			<-servReadyFnCh
   177  			errCh <- nil
   178  		}
   179  
   180  		c.stopper.AddCloser(stop.CloserFn(serv.Stop))
   181  		// Ensure we close all sticky stores we've created.
   182  		for _, store := range args.StoreSpecs {
   183  			if store.StickyInMemoryEngineID != "" {
   184  				engineID := store.StickyInMemoryEngineID
   185  				c.stopper.AddCloser(stop.CloserFn(func() {
   186  					if err := server.CloseStickyInMemEngine(engineID); err != nil {
   187  						// Something else may have already closed the sticky store.
   188  						// Since we are closer, it doesn't really matter.
   189  						log.Warningf(
   190  							ctx,
   191  							"could not close sticky in-memory store %s: %+v",
   192  							engineID,
   193  							err,
   194  						)
   195  					}
   196  				}))
   197  			}
   198  		}
   199  	}
   200  
   201  	c.servers = servers
   202  
   203  	if demoCtx.simulateLatency {
   204  		// Now, all servers have been started enough to know their own RPC serving
   205  		// addresses, but nothing else. Assemble the artificial latency map.
   206  		for i, src := range servers {
   207  			latencyMap := src.Cfg.TestingKnobs.Server.(*server.TestingKnobs).ContextTestingKnobs.ArtificialLatencyMap
   208  			srcLocality, ok := src.Cfg.Locality.Find("region")
   209  			if !ok {
   210  				continue
   211  			}
   212  			srcLocalityMap, ok := regionToRegionToLatency[srcLocality]
   213  			if !ok {
   214  				continue
   215  			}
   216  			for j, dst := range servers {
   217  				if i == j {
   218  					continue
   219  				}
   220  				dstLocality, ok := dst.Cfg.Locality.Find("region")
   221  				if !ok {
   222  					continue
   223  				}
   224  				latency := srcLocalityMap[dstLocality]
   225  				latencyMap[dst.ServingRPCAddr()] = latency
   226  			}
   227  		}
   228  	}
   229  
   230  	// We've assembled our latency maps and are ready for all servers to proceed
   231  	// through bootstrapping.
   232  	close(latencyMapWaitCh)
   233  
   234  	// Wait for all servers to respond.
   235  	{
   236  		timeRemaining := maxNodeInitTime
   237  		lastUpdateTime := timeutil.Now()
   238  		var err error
   239  		for i := 0; i < demoCtx.nodes; i++ {
   240  			select {
   241  			case e := <-errCh:
   242  				err = errors.CombineErrors(err, e)
   243  			case <-time.After(timeRemaining):
   244  				return c, errors.New("failed to setup transientCluster in time")
   245  			}
   246  			updateTime := timeutil.Now()
   247  			timeRemaining -= updateTime.Sub(lastUpdateTime)
   248  			lastUpdateTime = updateTime
   249  		}
   250  		if err != nil {
   251  			return c, err
   252  		}
   253  	}
   254  
   255  	// Create the root password if running in secure mode. We'll
   256  	// need that for the URL.
   257  	if !demoCtx.insecure {
   258  		if err := c.setupUserAuth(ctx); err != nil {
   259  			return c, err
   260  		}
   261  	}
   262  
   263  	if demoCtx.nodes < 3 {
   264  		// Set up the default zone configuration. We are using an in-memory store
   265  		// so we really want to disable replication.
   266  		if err := cliDisableReplication(ctx, c.s.Server); err != nil {
   267  			return c, err
   268  		}
   269  	}
   270  
   271  	// Prepare the URL for use by the SQL shell.
   272  	c.connURL, err = c.getNetworkURLForServer(0, gen, true /* includeAppName */)
   273  	if err != nil {
   274  		return c, err
   275  	}
   276  
   277  	// Start up the update check loop.
   278  	// We don't do this in (*server.Server).Start() because we don't want it
   279  	// in tests.
   280  	if !demoCtx.disableTelemetry {
   281  		c.s.PeriodicallyCheckForUpdates(ctx)
   282  	}
   283  	return c, nil
   284  }
   285  
   286  // testServerArgsForTransientCluster creates the test arguments for
   287  // a necessary server in the demo cluster.
   288  func testServerArgsForTransientCluster(
   289  	sock unixSocketDetails, nodeID roachpb.NodeID, joinAddr string, demoDir string,
   290  ) base.TestServerArgs {
   291  	// Assign a path to the store spec, to be saved.
   292  	storeSpec := base.DefaultTestStoreSpec
   293  	storeSpec.StickyInMemoryEngineID = fmt.Sprintf("demo-node%d", nodeID)
   294  
   295  	args := base.TestServerArgs{
   296  		SocketFile:        sock.filename(),
   297  		PartOfCluster:     true,
   298  		Stopper:           stop.NewStopper(),
   299  		JoinAddr:          joinAddr,
   300  		DisableTLSForHTTP: true,
   301  		StoreSpecs:        []base.StoreSpec{storeSpec},
   302  		SQLMemoryPoolSize: demoCtx.sqlPoolMemorySize,
   303  		CacheSize:         demoCtx.cacheSize,
   304  	}
   305  
   306  	if demoCtx.localities != nil {
   307  		args.Locality = demoCtx.localities[int(nodeID-1)]
   308  	}
   309  	if demoCtx.insecure {
   310  		args.Insecure = true
   311  	} else {
   312  		args.Insecure = false
   313  		args.SSLCertsDir = demoDir
   314  	}
   315  
   316  	return args
   317  }
   318  
   319  func (c *transientCluster) cleanup(ctx context.Context) {
   320  	if c.stopper != nil {
   321  		c.stopper.Stop(ctx)
   322  	}
   323  	if c.demoDir != "" {
   324  		if err := checkAndMaybeShout(os.RemoveAll(c.demoDir)); err != nil {
   325  			// There's nothing to do here anymore if err != nil.
   326  			_ = err
   327  		}
   328  	}
   329  }
   330  
   331  // DrainAndShutdown will gracefully attempt to drain a node in the cluster, and
   332  // then shut it down.
   333  func (c *transientCluster) DrainAndShutdown(nodeID roachpb.NodeID) error {
   334  	nodeIndex := int(nodeID - 1)
   335  
   336  	if nodeIndex < 0 || nodeIndex >= len(c.servers) {
   337  		return errors.Errorf("node %d does not exist", nodeID)
   338  	}
   339  	// This is possible if we re-assign c.s and make the other nodes to the new
   340  	// base node.
   341  	if nodeIndex == 0 {
   342  		return errors.Errorf("cannot shutdown node %d", nodeID)
   343  	}
   344  	if c.servers[nodeIndex] == nil {
   345  		return errors.Errorf("node %d is already shut down", nodeID)
   346  	}
   347  
   348  	ctx, cancel := context.WithCancel(context.Background())
   349  	defer cancel()
   350  
   351  	adminClient, finish, err := getAdminClient(ctx, *(c.servers[nodeIndex].Cfg))
   352  	if err != nil {
   353  		return err
   354  	}
   355  	defer finish()
   356  
   357  	if err := drainAndShutdown(ctx, adminClient); err != nil {
   358  		return err
   359  	}
   360  	c.servers[nodeIndex] = nil
   361  	return nil
   362  }
   363  
   364  // CallDecommission calls the Decommission RPC on a node.
   365  func (c *transientCluster) CallDecommission(nodeID roachpb.NodeID, decommissioning bool) error {
   366  	nodeIndex := int(nodeID - 1)
   367  
   368  	if nodeIndex < 0 || nodeIndex >= len(c.servers) {
   369  		return errors.Errorf("node %d does not exist", nodeID)
   370  	}
   371  
   372  	req := &serverpb.DecommissionRequest{
   373  		NodeIDs:         []roachpb.NodeID{nodeID},
   374  		Decommissioning: decommissioning,
   375  	}
   376  
   377  	ctx, cancel := context.WithCancel(context.Background())
   378  	defer cancel()
   379  
   380  	adminClient, finish, err := getAdminClient(ctx, *(c.s.Cfg))
   381  	if err != nil {
   382  		return err
   383  	}
   384  
   385  	defer finish()
   386  	_, err = adminClient.Decommission(ctx, req)
   387  	if err != nil {
   388  		return errors.Wrap(err, "while trying to mark as decommissioning")
   389  	}
   390  	return nil
   391  }
   392  
   393  // RestartNode will bring back a node in the cluster.
   394  // The node must have been shut down beforehand.
   395  // The node will restart, connecting to the same in memory node.
   396  func (c *transientCluster) RestartNode(nodeID roachpb.NodeID) error {
   397  	nodeIndex := int(nodeID - 1)
   398  
   399  	if nodeIndex < 0 || nodeIndex >= len(c.servers) {
   400  		return errors.Errorf("node %d does not exist", nodeID)
   401  	}
   402  	if c.servers[nodeIndex] != nil {
   403  		return errors.Errorf("node %d is already running", nodeID)
   404  	}
   405  
   406  	// TODO(#42243): re-compute the latency mapping.
   407  	args := testServerArgsForTransientCluster(c.sockForServer(nodeID), nodeID, c.s.ServingRPCAddr(), c.demoDir)
   408  	serv := server.TestServerFactory.New(args).(*server.TestServer)
   409  
   410  	// We want to only return after the server is ready.
   411  	readyCh := make(chan struct{})
   412  	serv.Cfg.ReadyFn = func(bool) {
   413  		close(readyCh)
   414  	}
   415  
   416  	if err := serv.Start(args); err != nil {
   417  		return err
   418  	}
   419  
   420  	// Wait until the server is ready to action.
   421  	select {
   422  	case <-readyCh:
   423  	case <-time.After(maxNodeInitTime):
   424  		return errors.Newf("could not initialize node %d in time", nodeID)
   425  	}
   426  
   427  	c.stopper.AddCloser(stop.CloserFn(serv.Stop))
   428  	c.servers[nodeIndex] = serv
   429  	return nil
   430  }
   431  
   432  func maybeWarnMemSize(ctx context.Context) {
   433  	if maxMemory, err := status.GetTotalMemory(ctx); err == nil {
   434  		requestedMem := (demoCtx.cacheSize + demoCtx.sqlPoolMemorySize) * int64(demoCtx.nodes)
   435  		maxRecommendedMem := int64(.75 * float64(maxMemory))
   436  		if requestedMem > maxRecommendedMem {
   437  			log.Shoutf(
   438  				ctx,
   439  				log.Severity_WARNING,
   440  				`HIGH MEMORY USAGE
   441  The sum of --max-sql-memory (%s) and --cache (%s) multiplied by the
   442  number of nodes (%d) results in potentially high memory usage on your
   443  device.
   444  This server is running at increased risk of memory-related failures.`,
   445  				demoNodeSQLMemSizeValue,
   446  				demoNodeCacheSizeValue,
   447  				demoCtx.nodes,
   448  			)
   449  		}
   450  	}
   451  }
   452  
   453  // generateCerts generates some temporary certificates for cockroach demo.
   454  func generateCerts(certsDir string) (err error) {
   455  	caKeyPath := filepath.Join(certsDir, security.EmbeddedCAKey)
   456  	// Create a CA-Key.
   457  	if err := security.CreateCAPair(
   458  		certsDir,
   459  		caKeyPath,
   460  		defaultKeySize,
   461  		defaultCALifetime,
   462  		false, /* allowKeyReuse */
   463  		false, /*overwrite */
   464  	); err != nil {
   465  		return err
   466  	}
   467  	// Generate a certificate for the demo nodes.
   468  	if err := security.CreateNodePair(
   469  		certsDir,
   470  		caKeyPath,
   471  		defaultKeySize,
   472  		defaultCertLifetime,
   473  		false, /* overwrite */
   474  		[]string{"127.0.0.1"},
   475  	); err != nil {
   476  		return err
   477  	}
   478  	// Create a certificate for the root user.
   479  	return security.CreateClientPair(
   480  		certsDir,
   481  		caKeyPath,
   482  		defaultKeySize,
   483  		defaultCertLifetime,
   484  		false, /* overwrite */
   485  		security.RootUser,
   486  		false, /* generatePKCS8Key */
   487  	)
   488  }
   489  
   490  func (c *transientCluster) getNetworkURLForServer(
   491  	serverIdx int, gen workload.Generator, includeAppName bool,
   492  ) (string, error) {
   493  	options := url.Values{}
   494  	if includeAppName {
   495  		options.Add("application_name", sqlbase.ReportableAppNamePrefix+"cockroach demo")
   496  	}
   497  	sqlURL := url.URL{
   498  		Scheme: "postgres",
   499  		Host:   c.servers[serverIdx].ServingSQLAddr(),
   500  	}
   501  	if gen != nil {
   502  		// The generator wants a particular database name to be
   503  		// pre-filled.
   504  		sqlURL.Path = gen.Meta().Name
   505  	}
   506  	// For a demo cluster we don't use client TLS certs and instead use
   507  	// password-based authentication with the password pre-filled in the
   508  	// URL.
   509  	if demoCtx.insecure {
   510  		sqlURL.User = url.User(security.RootUser)
   511  		options.Add("sslmode", "disable")
   512  	} else {
   513  		sqlURL.User = url.UserPassword(security.RootUser, defaultRootPassword)
   514  		options.Add("sslmode", "require")
   515  	}
   516  	sqlURL.RawQuery = options.Encode()
   517  	return sqlURL.String(), nil
   518  }
   519  
   520  func (c *transientCluster) setupUserAuth(ctx context.Context) error {
   521  	ie := c.s.InternalExecutor().(*sql.InternalExecutor)
   522  	_, err := ie.Exec(ctx, "set-root-password", nil, /* txn*/
   523  		`ALTER USER $1 WITH PASSWORD $2`,
   524  		security.RootUser,
   525  		defaultRootPassword,
   526  	)
   527  	return err
   528  }
   529  
   530  func (c *transientCluster) setupWorkload(
   531  	ctx context.Context, gen workload.Generator, licenseDone <-chan error,
   532  ) error {
   533  	// If there is a load generator, create its database and load its
   534  	// fixture.
   535  	if gen != nil {
   536  		db, err := gosql.Open("postgres", c.connURL)
   537  		if err != nil {
   538  			return err
   539  		}
   540  		defer db.Close()
   541  
   542  		if _, err := db.Exec(`CREATE DATABASE ` + gen.Meta().Name); err != nil {
   543  			return err
   544  		}
   545  
   546  		ctx := context.TODO()
   547  		var l workloadsql.InsertsDataLoader
   548  		if cliCtx.isInteractive {
   549  			fmt.Printf("#\n# Beginning initialization of the %s dataset, please wait...\n", gen.Meta().Name)
   550  		}
   551  		if _, err := workloadsql.Setup(ctx, db, gen, l); err != nil {
   552  			return err
   553  		}
   554  		// Perform partitioning if requested by configuration.
   555  		if demoCtx.geoPartitionedReplicas {
   556  			// Wait until the license has been acquired to trigger partitioning.
   557  			if cliCtx.isInteractive {
   558  				fmt.Println("#\n# Waiting for license acquisition to complete...")
   559  			}
   560  			if err := waitForLicense(licenseDone); err != nil {
   561  				return err
   562  			}
   563  			if cliCtx.isInteractive {
   564  				fmt.Println("#\n# Partitioning the demo database, please wait...")
   565  			}
   566  
   567  			db, err := gosql.Open("postgres", c.connURL)
   568  			if err != nil {
   569  				return err
   570  			}
   571  			defer db.Close()
   572  			// Based on validation done in setup, we know that this workload has a partitioning step.
   573  			if err := gen.(workload.Hookser).Hooks().Partition(db); err != nil {
   574  				return errors.Wrapf(err, "partitioning the demo database")
   575  			}
   576  		}
   577  
   578  		// Run the workload. This must occur after partitioning the database.
   579  		if demoCtx.runWorkload {
   580  			var sqlURLs []string
   581  			for i := range c.servers {
   582  				sqlURL, err := c.getNetworkURLForServer(i, gen, true /* includeAppName */)
   583  				if err != nil {
   584  					return err
   585  				}
   586  				sqlURLs = append(sqlURLs, sqlURL)
   587  			}
   588  			if err := c.runWorkload(ctx, gen, sqlURLs); err != nil {
   589  				return errors.Wrapf(err, "starting background workload")
   590  			}
   591  		}
   592  	}
   593  
   594  	return nil
   595  }
   596  
   597  func (c *transientCluster) runWorkload(
   598  	ctx context.Context, gen workload.Generator, sqlUrls []string,
   599  ) error {
   600  	opser, ok := gen.(workload.Opser)
   601  	if !ok {
   602  		return errors.Errorf("default dataset %s does not have a workload defined", gen.Meta().Name)
   603  	}
   604  
   605  	// Dummy registry to prove to the Opser.
   606  	reg := histogram.NewRegistry(time.Duration(100) * time.Millisecond)
   607  	ops, err := opser.Ops(sqlUrls, reg)
   608  	if err != nil {
   609  		return errors.Wrap(err, "unable to create workload")
   610  	}
   611  
   612  	// Use a light rate limit of 25 queries per second
   613  	limiter := rate.NewLimiter(rate.Limit(25), 1)
   614  
   615  	// Start a goroutine to run each of the workload functions.
   616  	for _, workerFn := range ops.WorkerFns {
   617  		workloadFun := func(f func(context.Context) error) func(context.Context) {
   618  			return func(ctx context.Context) {
   619  				for {
   620  					// Limit how quickly we can generate work.
   621  					if err := limiter.Wait(ctx); err != nil {
   622  						// When the limiter throws an error, panic because we don't
   623  						// expect any errors from it.
   624  						panic(err)
   625  					}
   626  					if err := f(ctx); err != nil {
   627  						// Only log an error and return when the workload function throws
   628  						// an error, because errors these errors should be ignored, and
   629  						// should not interrupt the rest of the demo.
   630  						log.Warningf(ctx, "Error running workload query: %+v\n", err)
   631  						return
   632  					}
   633  				}
   634  			}
   635  		}
   636  		// As the SQL shell is tied to `c.s`, this means we want to tie the workload
   637  		// onto this as we want the workload to stop when the server dies,
   638  		// rather than the cluster. Otherwise, interrupts on cockroach demo hangs.
   639  		c.s.Stopper().RunWorker(ctx, workloadFun(workerFn))
   640  	}
   641  
   642  	return nil
   643  }
   644  
   645  // acquireDemoLicense begins an asynchronous process to obtain a
   646  // temporary demo license from the Cockroach Labs website. It returns
   647  // a channel that can be waited on if it is needed to wait on the
   648  // license acquisition.
   649  func (c *transientCluster) acquireDemoLicense(ctx context.Context) (chan error, error) {
   650  	// Communicate information about license acquisition to services
   651  	// that depend on it.
   652  	licenseDone := make(chan error)
   653  	if demoCtx.disableLicenseAcquisition {
   654  		// If we are not supposed to acquire a license, close the channel
   655  		// immediately so that future waiters don't hang.
   656  		close(licenseDone)
   657  	} else {
   658  		// If we allow telemetry, then also try and get an enterprise license for the demo.
   659  		// GetAndApplyLicense will be nil in the pure OSS/BSL build of cockroach.
   660  		db, err := gosql.Open("postgres", c.connURL)
   661  		if err != nil {
   662  			return nil, err
   663  		}
   664  		go func() {
   665  			defer db.Close()
   666  
   667  			success, err := GetAndApplyLicense(db, c.s.ClusterID(), demoOrg)
   668  			if err != nil {
   669  				licenseDone <- err
   670  				return
   671  			}
   672  			if !success {
   673  				if demoCtx.geoPartitionedReplicas {
   674  					licenseDone <- errors.WithDetailf(
   675  						errors.New("unable to acquire a license for this demo"),
   676  						"Enterprise features are needed for this demo (--%s).",
   677  						cliflags.DemoGeoPartitionedReplicas.Name)
   678  					return
   679  				}
   680  			}
   681  			close(licenseDone)
   682  		}()
   683  	}
   684  
   685  	return licenseDone, nil
   686  }
   687  
   688  // sockForServer generates the metadata for a unix socket for the given node.
   689  // For example, node 1 gets socket /tmpdemodir/.s.PGSQL.26267,
   690  // node 2 gets socket /tmpdemodir/.s.PGSQL.26268, etc.
   691  func (c *transientCluster) sockForServer(nodeID roachpb.NodeID) unixSocketDetails {
   692  	if !c.useSockets {
   693  		return unixSocketDetails{}
   694  	}
   695  	defaultPort, _ := strconv.Atoi(base.DefaultPort)
   696  	return unixSocketDetails{
   697  		socketDir:  c.demoDir,
   698  		portNumber: defaultPort + int(nodeID) - 1,
   699  	}
   700  }
   701  
   702  type unixSocketDetails struct {
   703  	socketDir  string
   704  	portNumber int
   705  }
   706  
   707  func (s unixSocketDetails) exists() bool {
   708  	return s.socketDir != ""
   709  }
   710  
   711  func (s unixSocketDetails) filename() string {
   712  	if !s.exists() {
   713  		// No socket configured.
   714  		return ""
   715  	}
   716  	return filepath.Join(s.socketDir, fmt.Sprintf(".s.PGSQL.%d", s.portNumber))
   717  }
   718  
   719  func (s unixSocketDetails) String() string {
   720  	options := url.Values{}
   721  	options.Add("host", s.socketDir)
   722  	options.Add("port", strconv.Itoa(s.portNumber))
   723  
   724  	// Node: in the generated unix socket URL, a password is always
   725  	// included even in insecure mode This is OK because in insecure
   726  	// mode the password is not checked on the server.
   727  	sqlURL := url.URL{
   728  		Scheme:   "postgres",
   729  		User:     url.UserPassword(security.RootUser, defaultRootPassword),
   730  		RawQuery: options.Encode(),
   731  	}
   732  	return sqlURL.String()
   733  }
   734  
   735  func (c *transientCluster) listDemoNodes(w io.Writer, justOne bool) {
   736  	numNodesLive := 0
   737  	for i, s := range c.servers {
   738  		if s == nil {
   739  			continue
   740  		}
   741  		numNodesLive++
   742  		if numNodesLive > 1 && justOne {
   743  			// Demo introduction: we just want conn parameters for one node.
   744  			continue
   745  		}
   746  
   747  		nodeID := s.NodeID()
   748  		if !justOne {
   749  			// We skip the node ID if we're in the top level introduction of
   750  			// the demo.
   751  			fmt.Fprintf(w, "node %d:\n", nodeID)
   752  		}
   753  		// Print node ID and admin UI URL.
   754  		fmt.Fprintf(w, "  (console) %s\n", s.AdminURL())
   755  		// Print unix socket if defined.
   756  		if c.useSockets {
   757  			sock := c.sockForServer(nodeID)
   758  			fmt.Fprintln(w, "  (sql)    ", sock)
   759  		}
   760  		// Print network URL if defined.
   761  		netURL, err := c.getNetworkURLForServer(i, nil, false /*includeAppName*/)
   762  		if err != nil {
   763  			fmt.Fprintln(stderr, errors.Wrap(err, "retrieving network URL"))
   764  		} else {
   765  			fmt.Fprintln(w, "  (sql/tcp)", netURL)
   766  		}
   767  		fmt.Fprintln(w)
   768  	}
   769  	if numNodesLive == 0 {
   770  		fmt.Fprintln(w, "no demo nodes currently running")
   771  	}
   772  	if justOne && numNodesLive > 1 {
   773  		fmt.Fprintln(w, `To display connection parameters for other nodes, use \demo ls.`)
   774  	}
   775  }