github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cli/init.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package cli 12 13 import ( 14 "context" 15 "fmt" 16 "os" 17 "strings" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/server" 21 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 22 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 23 "github.com/cockroachdb/cockroach/pkg/util/retry" 24 "github.com/cockroachdb/errors" 25 "github.com/spf13/cobra" 26 "google.golang.org/grpc" 27 ) 28 29 var initCmd = &cobra.Command{ 30 Use: "init", 31 Short: "initialize a cluster", 32 Long: ` 33 Perform one-time-only initialization of a CockroachDB cluster. 34 35 After starting one or more nodes with --join flags, run the init 36 command on one node (passing the same --host and certificate flags 37 you would use for the sql command). The target of the init command 38 must appear in the --join flags of other nodes. 39 40 A node started without the --join flag initializes itself as a 41 single-node cluster, so the init command is not used in that case. 42 `, 43 Args: cobra.NoArgs, 44 RunE: maybeShoutError(MaybeDecorateGRPCError(runInit)), 45 } 46 47 func runInit(cmd *cobra.Command, args []string) error { 48 ctx, cancel := context.WithCancel(context.Background()) 49 defer cancel() 50 51 // Wait for the node to be ready for initialization. 52 conn, finish, err := waitForClientReadinessAndGetClientGRPCConn(ctx) 53 if err != nil { 54 return err 55 } 56 defer finish() 57 58 // Actually perform cluster initialization. 59 c := serverpb.NewInitClient(conn) 60 61 if _, err = c.Bootstrap(ctx, &serverpb.BootstrapRequest{}); err != nil { 62 if strings.Contains(err.Error(), server.ErrClusterInitialized.Error()) { 63 // We really want to use errors.Is() here but this would require 64 // error serialization support in gRPC. 65 // This is not yet performed in CockroachDB even though 66 // the error library now has infrastructure to do so, see: 67 // https://github.com/cockroachdb/errors/pull/14 68 return errors.WithHint(err, 69 "Please ensure all your start commands are using --join.") 70 } 71 return err 72 } 73 74 fmt.Fprintln(os.Stdout, "Cluster successfully initialized") 75 return nil 76 } 77 78 // waitForClientReadinessAndGetClientGRPCConn waits for the node to 79 // be ready for initialization. This check ensures that the `init` 80 // command is less likely to fail because it was issued too 81 // early. In general, retrying the `init` command is dangerous [0], 82 // so we make a best effort at minimizing chances for users to 83 // arrive in an uncomfortable situation. 84 // 85 // [0]: https://github.com/cockroachdb/cockroach/pull/19753#issuecomment-341561452 86 func waitForClientReadinessAndGetClientGRPCConn( 87 ctx context.Context, 88 ) (conn *grpc.ClientConn, finish func(), err error) { 89 defer func() { 90 // If we're returning with an error, tear down the gRPC connection 91 // that's been established, if any. 92 if finish != nil && err != nil { 93 finish() 94 } 95 }() 96 97 retryOpts := retry.Options{InitialBackoff: time.Second, MaxBackoff: time.Second} 98 for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); { 99 if err = contextutil.RunWithTimeout(ctx, "init-open-conn", 5*time.Second, 100 func(ctx context.Context) error { 101 // (Attempt to) establish the gRPC connection. If that fails, 102 // it may be that the server hasn't started to listen yet, in 103 // which case we'll retry. 104 conn, _, finish, err = getClientGRPCConn(ctx, serverCfg) 105 if err != nil { 106 return err 107 } 108 109 // Access the /health endpoint. Until/unless this succeeds, the 110 // node is not yet fully initialized and ready to accept 111 // Bootstrap requests. 112 ac := serverpb.NewAdminClient(conn) 113 _, err := ac.Health(ctx, &serverpb.HealthRequest{}) 114 return err 115 }); err != nil { 116 err = errors.Wrapf(err, "node not ready to perform cluster initialization") 117 fmt.Fprintln(stderr, "warning:", err, "(retrying)") 118 119 // We're going to retry; first cancel the connection that's 120 // been established, if any. 121 if finish != nil { 122 finish() 123 finish = nil 124 } 125 // Then retry. 126 continue 127 } 128 129 // No error - connection was established and health endpoint is 130 // ready. 131 return conn, finish, err 132 } 133 err = errors.New("maximum number of retries exceeded") 134 return 135 }