github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/cmd/bacalhau/devstack.go (about) 1 package bacalhau 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "strconv" 8 9 "github.com/filecoin-project/bacalhau/pkg/config" 10 "github.com/filecoin-project/bacalhau/pkg/devstack" 11 "github.com/filecoin-project/bacalhau/pkg/node" 12 "github.com/filecoin-project/bacalhau/pkg/system" 13 "github.com/filecoin-project/bacalhau/pkg/telemetry" 14 "github.com/filecoin-project/bacalhau/pkg/util/templates" 15 "k8s.io/kubectl/pkg/util/i18n" 16 17 "github.com/spf13/cobra" 18 ) 19 20 var ( 21 devStackLong = templates.LongDesc(i18n.T(` 22 Start a cluster of nodes and run a job on them. 23 `)) 24 25 //nolint:lll // Documentation 26 devstackExample = templates.Examples(i18n.T(` 27 # Create a devstack cluster with a single requester node and 3 compute nodes (Default values) 28 bacalhau devstack 29 30 # Create a devstack cluster with a two requester nodes and 10 compute nodes 31 bacalhau devstack --requester-nodes 2 --compute-nodes 10 32 33 # Create a devstack cluster with a single hybrid (requester and compute) nodes 34 bacalhau devstack --requester-nodes 0 --compute-nodes 0 --hybrid-nodes 1 35 `)) 36 ) 37 38 func newDevStackOptions() *devstack.DevStackOptions { 39 return &devstack.DevStackOptions{ 40 NumberOfRequesterOnlyNodes: 1, 41 NumberOfComputeOnlyNodes: 3, 42 NumberOfBadComputeActors: 0, 43 Peer: "", 44 PublicIPFSMode: false, 45 EstuaryAPIKey: os.Getenv("ESTUARY_API_KEY"), 46 LocalNetworkLotus: false, 47 SimulatorAddr: "", 48 SimulatorMode: false, 49 CPUProfilingFile: "", 50 MemoryProfilingFile: "", 51 } 52 } 53 54 func newDevStackCmd() *cobra.Command { 55 ODs := newDevStackOptions() 56 OS := NewServeOptions() 57 IsNoop := false 58 59 devstackCmd := &cobra.Command{ 60 Use: "devstack", 61 Short: "Start a cluster of bacalhau nodes for testing and development", 62 Long: devStackLong, 63 Example: devstackExample, 64 RunE: func(cmd *cobra.Command, _ []string) error { 65 return runDevstack(cmd, ODs, OS, IsNoop) 66 }, 67 } 68 69 devstackCmd.PersistentFlags().IntVar( 70 &ODs.NumberOfHybridNodes, "hybrid-nodes", ODs.NumberOfHybridNodes, 71 `How many hybrid (requester and compute) nodes should be started in the cluster`, 72 ) 73 devstackCmd.PersistentFlags().IntVar( 74 &ODs.NumberOfRequesterOnlyNodes, "requester-nodes", ODs.NumberOfRequesterOnlyNodes, 75 `How many requester only nodes should be started in the cluster`, 76 ) 77 devstackCmd.PersistentFlags().IntVar( 78 &ODs.NumberOfComputeOnlyNodes, "compute-nodes", ODs.NumberOfComputeOnlyNodes, 79 `How many compute only nodes should be started in the cluster`, 80 ) 81 devstackCmd.PersistentFlags().IntVar( 82 &ODs.NumberOfBadComputeActors, "bad-compute-actors", ODs.NumberOfBadComputeActors, 83 `How many compute nodes should be bad actors`, 84 ) 85 devstackCmd.PersistentFlags().IntVar( 86 &ODs.NumberOfBadRequesterActors, "bad-requester-actors", ODs.NumberOfBadRequesterActors, 87 `How many requester nodes should be bad actors`, 88 ) 89 devstackCmd.PersistentFlags().BoolVar( 90 &IsNoop, "noop", false, 91 `Use the noop executor and verifier for all jobs`, 92 ) 93 devstackCmd.PersistentFlags().StringVar( 94 &ODs.Peer, "peer", ODs.Peer, 95 `Connect node 0 to another network node`, 96 ) 97 devstackCmd.PersistentFlags().BoolVar( 98 &ODs.LocalNetworkLotus, "lotus-node", ODs.LocalNetworkLotus, 99 "Also start a Lotus FileCoin instance", 100 ) 101 devstackCmd.PersistentFlags().StringVar( 102 &ODs.SimulatorAddr, "simulator-addr", ODs.SimulatorAddr, 103 `Use the simulator transport at the given node multi addr`, 104 ) 105 devstackCmd.PersistentFlags().BoolVar( 106 &ODs.SimulatorMode, "simulator-mode", false, 107 `If set, one of the nodes will act as a simulator and will proxy all requests to the other nodes`, 108 ) 109 devstackCmd.PersistentFlags().BoolVar( 110 &ODs.PublicIPFSMode, "public-ipfs", ODs.PublicIPFSMode, 111 `Connect devstack to public IPFS`, 112 ) 113 devstackCmd.PersistentFlags().StringVar( 114 &ODs.CPUProfilingFile, "cpu-profiling-file", ODs.CPUProfilingFile, 115 "File to save CPU profiling to", 116 ) 117 devstackCmd.PersistentFlags().StringVar( 118 &ODs.MemoryProfilingFile, "memory-profiling-file", ODs.MemoryProfilingFile, 119 "File to save memory profiling to", 120 ) 121 122 setupJobSelectionCLIFlags(devstackCmd, OS) 123 setupCapacityManagerCLIFlags(devstackCmd, OS) 124 125 return devstackCmd 126 } 127 128 func runDevstack(cmd *cobra.Command, ODs *devstack.DevStackOptions, OS *ServeOptions, IsNoop bool) error { 129 ctx := cmd.Context() 130 131 cm := ctx.Value(systemManagerKey).(*system.CleanupManager) 132 133 if config.DevstackShouldWriteEnvFile() { 134 cm.RegisterCallback(cleanupDevstackDotEnv) 135 } 136 cm.RegisterCallback(telemetry.Cleanup) 137 138 config.DevstackSetShouldPrintInfo() 139 140 totalComputeNodes := ODs.NumberOfComputeOnlyNodes + ODs.NumberOfHybridNodes 141 totalRequesterNodes := ODs.NumberOfRequesterOnlyNodes + ODs.NumberOfHybridNodes 142 if ODs.NumberOfBadComputeActors > totalComputeNodes { 143 Fatal(cmd, fmt.Sprintf("You cannot have more bad compute actors (%d) than there are nodes (%d).", 144 ODs.NumberOfBadComputeActors, totalComputeNodes), 1) 145 } 146 if ODs.NumberOfBadRequesterActors > totalRequesterNodes { 147 Fatal(cmd, fmt.Sprintf("You cannot have more bad requester actors (%d) than there are nodes (%d).", 148 ODs.NumberOfBadRequesterActors, totalRequesterNodes), 1) 149 } 150 151 portFileName := filepath.Join(os.TempDir(), "bacalhau-devstack.port") 152 pidFileName := filepath.Join(os.TempDir(), "bacalhau-devstack.pid") 153 154 if _, ignore := os.LookupEnv("IGNORE_PID_AND_PORT_FILES"); !ignore { 155 _, err := os.Stat(portFileName) 156 if err == nil { 157 Fatal(cmd, fmt.Sprintf("Found file %s - Devstack likely already running", portFileName), 1) 158 } 159 _, err = os.Stat(pidFileName) 160 if err == nil { 161 Fatal(cmd, fmt.Sprintf("Found file %s - Devstack likely already running", pidFileName), 1) 162 } 163 } 164 165 computeConfig := getComputeConfig(OS) 166 if ODs.LocalNetworkLotus { 167 cmd.Println("Note that starting up the Lotus node can take many minutes!") 168 } 169 170 var stack *devstack.DevStack 171 var stackErr error 172 if IsNoop { 173 stack, stackErr = devstack.NewNoopDevStack(ctx, cm, *ODs, computeConfig, node.NewRequesterConfigWithDefaults()) 174 } else { 175 stack, stackErr = devstack.NewStandardDevStack(ctx, cm, *ODs, computeConfig, node.NewRequesterConfigWithDefaults()) 176 } 177 if stackErr != nil { 178 return stackErr 179 } 180 181 nodeInfoOutput, err := stack.PrintNodeInfo(ctx) 182 if err != nil { 183 Fatal(cmd, fmt.Sprintf("Failed to print node info: %s", err.Error()), 1) 184 } 185 cmd.Println(nodeInfoOutput) 186 187 f, err := os.Create(portFileName) 188 if err != nil { 189 Fatal(cmd, fmt.Sprintf("Error writing out port file to %v", portFileName), 1) 190 } 191 defer os.Remove(portFileName) 192 firstNode := stack.Nodes[0] 193 _, err = f.WriteString(strconv.Itoa(firstNode.APIServer.Port)) 194 if err != nil { 195 Fatal(cmd, fmt.Sprintf("Error writing out port file: %v", portFileName), 1) 196 } 197 198 fPid, err := os.Create(pidFileName) 199 if err != nil { 200 Fatal(cmd, fmt.Sprintf("Error writing out pid file to %v", pidFileName), 1) 201 } 202 defer os.Remove(pidFileName) 203 204 _, err = fPid.WriteString(strconv.Itoa(os.Getpid())) 205 if err != nil { 206 Fatal(cmd, fmt.Sprintf("Error writing out pid file: %v", pidFileName), 1) 207 } 208 209 <-ctx.Done() // block until killed 210 211 cmd.Println("Shutting down devstack") 212 return nil 213 } 214 215 func cleanupDevstackDotEnv() error { 216 if _, err := os.Stat(config.DevstackEnvFile()); err == nil { 217 return os.Remove(config.DevstackEnvFile()) 218 } 219 return nil 220 }