go4.org@v0.0.0-20230225012048-214862532bf5/cloud/cloudlaunch/cloudlaunch.go (about) 1 /* 2 Copyright 2015 The Perkeep Authors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package cloudlaunch helps binaries run themselves on The Cloud, copying 18 // themselves to GCE. 19 package cloudlaunch // import "go4.org/cloud/cloudlaunch" 20 21 import ( 22 "encoding/json" 23 "flag" 24 "fmt" 25 "io" 26 "io/ioutil" 27 "log" 28 "net/http" 29 "os" 30 "path" 31 "path/filepath" 32 "runtime" 33 "strings" 34 "time" 35 36 "go4.org/cloud/google/gceutil" 37 38 "cloud.google.com/go/compute/metadata" 39 "cloud.google.com/go/storage" 40 "golang.org/x/net/context" 41 "golang.org/x/oauth2" 42 "golang.org/x/oauth2/google" 43 compute "google.golang.org/api/compute/v1" 44 "google.golang.org/api/googleapi" 45 "google.golang.org/api/option" 46 storageapi "google.golang.org/api/storage/v1" 47 ) 48 49 func readFile(v string) string { 50 slurp, err := ioutil.ReadFile(v) 51 if err != nil { 52 log.Fatalf("Error reading %s: %v", v, err) 53 } 54 return strings.TrimSpace(string(slurp)) 55 } 56 57 const baseConfig = `#cloud-config 58 coreos: 59 update: 60 group: stable 61 reboot-strategy: $REBOOT 62 units: 63 - name: $NAME.service 64 command: start 65 content: | 66 [Unit] 67 Description=$NAME service 68 After=network.target 69 70 [Service] 71 Type=simple 72 ExecStartPre=/bin/sh -c 'mkdir -p /opt/bin && /usr/bin/curl --silent -f -o /opt/bin/$NAME $URL?$(date +%s) && chmod +x /opt/bin/$NAME' 73 ExecStart=/opt/bin/$NAME 74 RestartSec=10 75 Restart=always 76 StartLimitInterval=0 77 78 [Install] 79 WantedBy=network-online.target 80 ` 81 82 // RestartPolicy controls whether the binary automatically restarts. 83 type RestartPolicy int 84 85 const ( 86 RestartOnUpdates RestartPolicy = iota 87 RestartNever 88 // TODO: more graceful restarts; make systemd own listening on network sockets, 89 // don't break connections. 90 ) 91 92 type Config struct { 93 // Name is the name of a service to run. 94 // This is the name of the systemd service (without .service) 95 // and the name of the GCE instance. 96 Name string 97 98 // RestartPolicy controls whether the binary automatically restarts 99 // on updates. The zero value means automatic. 100 RestartPolicy RestartPolicy 101 102 // UpdateStrategy sets the CoreOS automatic update strategy, and the 103 // associated reboots. Possible values are "best-effort", "etcd-lock", 104 // "reboot", "off", with "best-effort" being the default. See 105 // https://coreos.com/os/docs/latest/update-strategies.html 106 UpdateStrategy string 107 108 // BinaryBucket and BinaryObject are the GCS bucket and object 109 // within that bucket containing the Linux binary to download 110 // on boot and occasionally run. This binary must be public 111 // (at least for now). 112 BinaryBucket string 113 BinaryObject string // defaults to Name 114 115 GCEProjectID string 116 Zone string // defaults to us-central1-f 117 SSD bool 118 119 Scopes []string // any additional scopes 120 121 MachineType string 122 InstanceName string 123 } 124 125 // cloudLaunch is a launch of a Config. 126 type cloudLaunch struct { 127 *Config 128 oauthClient *http.Client 129 computeService *compute.Service 130 } 131 132 func (c *Config) binaryURL() string { 133 return "https://storage.googleapis.com/" + c.BinaryBucket + "/" + c.binaryObject() 134 } 135 136 func (c *Config) instName() string { return c.Name } // for now 137 func (c *Config) zone() string { return strDefault(c.Zone, "us-central1-f") } 138 func (c *Config) machineType() string { return strDefault(c.MachineType, "g1-small") } 139 func (c *Config) binaryObject() string { return strDefault(c.BinaryObject, c.Name) } 140 func (c *Config) updateStrategy() string { return strDefault(c.UpdateStrategy, "best-effort") } 141 142 func (c *Config) projectAPIURL() string { 143 return "https://www.googleapis.com/compute/v1/projects/" + c.GCEProjectID 144 } 145 func (c *Config) machineTypeURL() string { 146 return c.projectAPIURL() + "/zones/" + c.zone() + "/machineTypes/" + c.machineType() 147 } 148 149 func strDefault(a, b string) string { 150 if a != "" { 151 return a 152 } 153 return b 154 } 155 156 var ( 157 doLaunch = flag.Bool("cloudlaunch", false, "Deploy or update this binary to the cloud. Must be on Linux, for now.") 158 ) 159 160 func (c *Config) MaybeDeploy() { 161 flag.Parse() 162 if !*doLaunch { 163 go c.restartLoop() 164 return 165 } 166 defer os.Exit(1) // backup, in case we return without Fatal or os.Exit later 167 168 if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { 169 log.Fatal("Can only use --cloudlaunch on linux/amd64, for now.") 170 } 171 172 if c.GCEProjectID == "" { 173 log.Fatal("cloudconfig.GCEProjectID is empty") 174 } 175 filename := filepath.Join(os.Getenv("HOME"), "keys", c.GCEProjectID+".key.json") 176 log.Printf("Using OAuth config from JSON service file: %s", filename) 177 jwtConf, err := google.JWTConfigFromJSON([]byte(readFile(filename)), append([]string{ 178 storageapi.DevstorageFullControlScope, 179 compute.ComputeScope, 180 "https://www.googleapis.com/auth/cloud-platform", 181 }, c.Scopes...)...) 182 if err != nil { 183 log.Fatalf("ConfigFromJSON: %v", err) 184 } 185 186 cl := &cloudLaunch{ 187 Config: c, 188 oauthClient: jwtConf.Client(oauth2.NoContext), 189 } 190 cl.computeService, _ = compute.New(cl.oauthClient) 191 192 cl.uploadBinary() 193 cl.createInstance() 194 os.Exit(0) 195 } 196 197 func (c *Config) restartLoop() { 198 if !metadata.OnGCE() { 199 return 200 } 201 if c.RestartPolicy == RestartNever { 202 return 203 } 204 url := c.binaryURL() 205 var lastEtag string 206 for { 207 res, err := http.Head(url + "?" + fmt.Sprint(time.Now().Unix())) 208 if err != nil { 209 log.Printf("Warning: %v", err) 210 time.Sleep(15 * time.Second) 211 continue 212 } 213 etag := res.Header.Get("Etag") 214 if etag == "" { 215 log.Printf("Warning, no ETag in response: %v", res) 216 time.Sleep(15 * time.Second) 217 continue 218 } 219 if lastEtag != "" && etag != lastEtag { 220 log.Printf("Binary updated; restarting.") 221 // TODO: more graceful restart, letting systemd own the network connections. 222 // Then we can finish up requests here. 223 os.Exit(0) 224 } 225 lastEtag = etag 226 time.Sleep(15 * time.Second) 227 } 228 } 229 230 // uploadBinary uploads the currently-running Linux binary. 231 // It crashes if it fails. 232 func (cl *cloudLaunch) uploadBinary() { 233 ctx := context.Background() 234 if cl.BinaryBucket == "" { 235 log.Fatal("cloudlaunch: Config.BinaryBucket is empty") 236 } 237 stoClient, err := storage.NewClient(ctx, option.WithHTTPClient(cl.oauthClient)) 238 if err != nil { 239 log.Fatal(err) 240 } 241 w := stoClient.Bucket(cl.BinaryBucket).Object(cl.binaryObject()).NewWriter(ctx) 242 if err != nil { 243 log.Fatal(err) 244 } 245 w.ACL = []storage.ACLRule{ 246 // If you don't give the owners access, the web UI seems to 247 // have a bug and doesn't have access to see that it's public, so 248 // won't render the "Shared Publicly" link. So we do that, even 249 // though it's dumb and unnecessary otherwise: 250 { 251 Entity: storage.ACLEntity("project-owners-" + cl.GCEProjectID), 252 Role: storage.RoleOwner, 253 }, 254 // Public, so our systemd unit can get it easily: 255 { 256 Entity: storage.AllUsers, 257 Role: storage.RoleReader, 258 }, 259 } 260 w.CacheControl = "no-cache" 261 selfPath := getSelfPath() 262 log.Printf("Uploading %q to %v", selfPath, cl.binaryURL()) 263 f, err := os.Open(selfPath) 264 if err != nil { 265 log.Fatal(err) 266 } 267 defer f.Close() 268 n, err := io.Copy(w, f) 269 if err != nil { 270 log.Fatal(err) 271 } 272 if err := w.Close(); err != nil { 273 log.Fatal(err) 274 } 275 log.Printf("Uploaded %d bytes", n) 276 } 277 278 func getSelfPath() string { 279 if runtime.GOOS != "linux" { 280 panic("TODO") 281 } 282 v, err := os.Readlink("/proc/self/exe") 283 if err != nil { 284 log.Fatal(err) 285 } 286 return v 287 } 288 289 func zoneInRegion(zone, regionURL string) bool { 290 if zone == "" { 291 panic("empty zone") 292 } 293 if regionURL == "" { 294 panic("empty regionURL") 295 } 296 // zone is like "us-central1-f" 297 // regionURL is like "https://www.googleapis.com/compute/v1/projects/camlistore-website/regions/us-central1" 298 region := path.Base(regionURL) // "us-central1" 299 if region == "" { 300 panic("empty region") 301 } 302 return strings.HasPrefix(zone, region) 303 } 304 305 // findIP finds an IP address to use, or returns the empty string if none is found. 306 // It tries to find a reserved one in the same region where the name of the reserved IP 307 // is "NAME-ip" and the IP is not in use. 308 func (cl *cloudLaunch) findIP() string { 309 // Try to find it by name. 310 aggAddrList, err := cl.computeService.Addresses.AggregatedList(cl.GCEProjectID).Do() 311 if err != nil { 312 log.Fatal(err) 313 } 314 // https://godoc.org/google.golang.org/api/compute/v1#AddressAggregatedList 315 var ip string 316 IPLoop: 317 for _, asl := range aggAddrList.Items { 318 for _, addr := range asl.Addresses { 319 log.Printf(" addr: %#v", addr) 320 if addr.Name == cl.Name+"-ip" && addr.Status == "RESERVED" && zoneInRegion(cl.zone(), addr.Region) { 321 ip = addr.Address 322 break IPLoop 323 } 324 } 325 } 326 return ip 327 } 328 329 func (cl *cloudLaunch) createInstance() { 330 inst := cl.lookupInstance() 331 if inst != nil { 332 log.Printf("Instance exists; not re-creating.") 333 return 334 } 335 336 log.Printf("Instance doesn't exist; creating...") 337 338 ip := cl.findIP() 339 log.Printf("Found IP: %v", ip) 340 341 cloudConfig := strings.NewReplacer( 342 "$NAME", cl.Name, 343 "$URL", cl.binaryURL(), 344 "$REBOOT", cl.updateStrategy(), 345 ).Replace(baseConfig) 346 347 instance := &compute.Instance{ 348 Name: cl.instName(), 349 Description: cl.Name, 350 MachineType: cl.machineTypeURL(), 351 Disks: []*compute.AttachedDisk{cl.instanceDisk()}, 352 Tags: &compute.Tags{ 353 Items: []string{"http-server", "https-server"}, 354 }, 355 Metadata: &compute.Metadata{ 356 Items: []*compute.MetadataItems{ 357 { 358 Key: "user-data", 359 Value: googleapi.String(cloudConfig), 360 }, 361 }, 362 }, 363 NetworkInterfaces: []*compute.NetworkInterface{ 364 &compute.NetworkInterface{ 365 AccessConfigs: []*compute.AccessConfig{ 366 &compute.AccessConfig{ 367 Type: "ONE_TO_ONE_NAT", 368 Name: "External NAT", 369 NatIP: ip, 370 }, 371 }, 372 Network: cl.projectAPIURL() + "/global/networks/default", 373 }, 374 }, 375 ServiceAccounts: []*compute.ServiceAccount{ 376 { 377 Email: "default", 378 Scopes: cl.Scopes, 379 }, 380 }, 381 } 382 383 log.Printf("Creating instance...") 384 op, err := cl.computeService.Instances.Insert(cl.GCEProjectID, cl.zone(), instance).Do() 385 if err != nil { 386 log.Fatalf("Failed to create instance: %v", err) 387 } 388 opName := op.Name 389 log.Printf("Created. Waiting on operation %v", opName) 390 OpLoop: 391 for { 392 time.Sleep(2 * time.Second) 393 op, err := cl.computeService.ZoneOperations.Get(cl.GCEProjectID, cl.zone(), opName).Do() 394 if err != nil { 395 log.Fatalf("Failed to get op %s: %v", opName, err) 396 } 397 switch op.Status { 398 case "PENDING", "RUNNING": 399 log.Printf("Waiting on operation %v", opName) 400 continue 401 case "DONE": 402 if op.Error != nil { 403 for _, operr := range op.Error.Errors { 404 log.Printf("Error: %+v", operr) 405 } 406 log.Fatalf("Failed to start.") 407 } 408 log.Printf("Success. %+v", op) 409 break OpLoop 410 default: 411 log.Fatalf("Unknown status %q: %+v", op.Status, op) 412 } 413 } 414 415 inst, err = cl.computeService.Instances.Get(cl.GCEProjectID, cl.zone(), cl.instName()).Do() 416 if err != nil { 417 log.Fatalf("Error getting instance after creation: %v", err) 418 } 419 ij, _ := json.MarshalIndent(inst, "", " ") 420 log.Printf("%s", ij) 421 log.Printf("Instance created.") 422 os.Exit(0) 423 } 424 425 // returns nil if instance doesn't exist. 426 func (cl *cloudLaunch) lookupInstance() *compute.Instance { 427 inst, err := cl.computeService.Instances.Get(cl.GCEProjectID, cl.zone(), cl.instName()).Do() 428 if ae, ok := err.(*googleapi.Error); ok && ae.Code == 404 { 429 return nil 430 } else if err != nil { 431 log.Fatalf("Instances.Get: %v", err) 432 } 433 return inst 434 } 435 436 func (cl *cloudLaunch) instanceDisk() *compute.AttachedDisk { 437 imageURL, err := gceutil.CoreOSImageURL(cl.oauthClient) 438 if err != nil { 439 log.Fatalf("error looking up latest CoreOS stable image: %v", err) 440 } 441 diskName := cl.instName() + "-coreos-stateless-pd" 442 var diskType string 443 if cl.SSD { 444 diskType = cl.projectAPIURL() + "/zones/" + cl.zone() + "/diskTypes/pd-ssd" 445 } 446 return &compute.AttachedDisk{ 447 AutoDelete: true, 448 Boot: true, 449 Type: "PERSISTENT", 450 InitializeParams: &compute.AttachedDiskInitializeParams{ 451 DiskName: diskName, 452 SourceImage: imageURL, 453 DiskSizeGb: 50, 454 DiskType: diskType, 455 }, 456 } 457 }