go4.org@v0.0.0-20230225012048-214862532bf5/cloud/cloudlaunch/cloudlaunch.go (about)

     1  /*
     2  Copyright 2015 The Perkeep Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package cloudlaunch helps binaries run themselves on The Cloud, copying
    18  // themselves to GCE.
    19  package cloudlaunch // import "go4.org/cloud/cloudlaunch"
    20  
    21  import (
    22  	"encoding/json"
    23  	"flag"
    24  	"fmt"
    25  	"io"
    26  	"io/ioutil"
    27  	"log"
    28  	"net/http"
    29  	"os"
    30  	"path"
    31  	"path/filepath"
    32  	"runtime"
    33  	"strings"
    34  	"time"
    35  
    36  	"go4.org/cloud/google/gceutil"
    37  
    38  	"cloud.google.com/go/compute/metadata"
    39  	"cloud.google.com/go/storage"
    40  	"golang.org/x/net/context"
    41  	"golang.org/x/oauth2"
    42  	"golang.org/x/oauth2/google"
    43  	compute "google.golang.org/api/compute/v1"
    44  	"google.golang.org/api/googleapi"
    45  	"google.golang.org/api/option"
    46  	storageapi "google.golang.org/api/storage/v1"
    47  )
    48  
    49  func readFile(v string) string {
    50  	slurp, err := ioutil.ReadFile(v)
    51  	if err != nil {
    52  		log.Fatalf("Error reading %s: %v", v, err)
    53  	}
    54  	return strings.TrimSpace(string(slurp))
    55  }
    56  
    57  const baseConfig = `#cloud-config
    58  coreos:
    59    update:
    60      group: stable
    61      reboot-strategy: $REBOOT
    62    units:
    63      - name: $NAME.service
    64        command: start
    65        content: |
    66          [Unit]
    67          Description=$NAME service
    68          After=network.target
    69          
    70          [Service]
    71          Type=simple
    72          ExecStartPre=/bin/sh -c 'mkdir -p /opt/bin && /usr/bin/curl --silent -f -o /opt/bin/$NAME $URL?$(date +%s) && chmod +x /opt/bin/$NAME'
    73          ExecStart=/opt/bin/$NAME
    74          RestartSec=10
    75          Restart=always
    76          StartLimitInterval=0
    77          
    78          [Install]
    79          WantedBy=network-online.target
    80  `
    81  
    82  // RestartPolicy controls whether the binary automatically restarts.
    83  type RestartPolicy int
    84  
    85  const (
    86  	RestartOnUpdates RestartPolicy = iota
    87  	RestartNever
    88  	// TODO: more graceful restarts; make systemd own listening on network sockets,
    89  	// don't break connections.
    90  )
    91  
    92  type Config struct {
    93  	// Name is the name of a service to run.
    94  	// This is the name of the systemd service (without .service)
    95  	// and the name of the GCE instance.
    96  	Name string
    97  
    98  	// RestartPolicy controls whether the binary automatically restarts
    99  	// on updates. The zero value means automatic.
   100  	RestartPolicy RestartPolicy
   101  
   102  	// UpdateStrategy sets the CoreOS automatic update strategy, and the
   103  	// associated reboots. Possible values are "best-effort", "etcd-lock",
   104  	// "reboot", "off", with "best-effort" being the default. See
   105  	// https://coreos.com/os/docs/latest/update-strategies.html
   106  	UpdateStrategy string
   107  
   108  	// BinaryBucket and BinaryObject are the GCS bucket and object
   109  	// within that bucket containing the Linux binary to download
   110  	// on boot and occasionally run. This binary must be public
   111  	// (at least for now).
   112  	BinaryBucket string
   113  	BinaryObject string // defaults to Name
   114  
   115  	GCEProjectID string
   116  	Zone         string // defaults to us-central1-f
   117  	SSD          bool
   118  
   119  	Scopes []string // any additional scopes
   120  
   121  	MachineType  string
   122  	InstanceName string
   123  }
   124  
   125  // cloudLaunch is a launch of a Config.
   126  type cloudLaunch struct {
   127  	*Config
   128  	oauthClient    *http.Client
   129  	computeService *compute.Service
   130  }
   131  
   132  func (c *Config) binaryURL() string {
   133  	return "https://storage.googleapis.com/" + c.BinaryBucket + "/" + c.binaryObject()
   134  }
   135  
   136  func (c *Config) instName() string       { return c.Name } // for now
   137  func (c *Config) zone() string           { return strDefault(c.Zone, "us-central1-f") }
   138  func (c *Config) machineType() string    { return strDefault(c.MachineType, "g1-small") }
   139  func (c *Config) binaryObject() string   { return strDefault(c.BinaryObject, c.Name) }
   140  func (c *Config) updateStrategy() string { return strDefault(c.UpdateStrategy, "best-effort") }
   141  
   142  func (c *Config) projectAPIURL() string {
   143  	return "https://www.googleapis.com/compute/v1/projects/" + c.GCEProjectID
   144  }
   145  func (c *Config) machineTypeURL() string {
   146  	return c.projectAPIURL() + "/zones/" + c.zone() + "/machineTypes/" + c.machineType()
   147  }
   148  
   149  func strDefault(a, b string) string {
   150  	if a != "" {
   151  		return a
   152  	}
   153  	return b
   154  }
   155  
   156  var (
   157  	doLaunch = flag.Bool("cloudlaunch", false, "Deploy or update this binary to the cloud. Must be on Linux, for now.")
   158  )
   159  
   160  func (c *Config) MaybeDeploy() {
   161  	flag.Parse()
   162  	if !*doLaunch {
   163  		go c.restartLoop()
   164  		return
   165  	}
   166  	defer os.Exit(1) // backup, in case we return without Fatal or os.Exit later
   167  
   168  	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
   169  		log.Fatal("Can only use --cloudlaunch on linux/amd64, for now.")
   170  	}
   171  
   172  	if c.GCEProjectID == "" {
   173  		log.Fatal("cloudconfig.GCEProjectID is empty")
   174  	}
   175  	filename := filepath.Join(os.Getenv("HOME"), "keys", c.GCEProjectID+".key.json")
   176  	log.Printf("Using OAuth config from JSON service file: %s", filename)
   177  	jwtConf, err := google.JWTConfigFromJSON([]byte(readFile(filename)), append([]string{
   178  		storageapi.DevstorageFullControlScope,
   179  		compute.ComputeScope,
   180  		"https://www.googleapis.com/auth/cloud-platform",
   181  	}, c.Scopes...)...)
   182  	if err != nil {
   183  		log.Fatalf("ConfigFromJSON: %v", err)
   184  	}
   185  
   186  	cl := &cloudLaunch{
   187  		Config:      c,
   188  		oauthClient: jwtConf.Client(oauth2.NoContext),
   189  	}
   190  	cl.computeService, _ = compute.New(cl.oauthClient)
   191  
   192  	cl.uploadBinary()
   193  	cl.createInstance()
   194  	os.Exit(0)
   195  }
   196  
   197  func (c *Config) restartLoop() {
   198  	if !metadata.OnGCE() {
   199  		return
   200  	}
   201  	if c.RestartPolicy == RestartNever {
   202  		return
   203  	}
   204  	url := c.binaryURL()
   205  	var lastEtag string
   206  	for {
   207  		res, err := http.Head(url + "?" + fmt.Sprint(time.Now().Unix()))
   208  		if err != nil {
   209  			log.Printf("Warning: %v", err)
   210  			time.Sleep(15 * time.Second)
   211  			continue
   212  		}
   213  		etag := res.Header.Get("Etag")
   214  		if etag == "" {
   215  			log.Printf("Warning, no ETag in response: %v", res)
   216  			time.Sleep(15 * time.Second)
   217  			continue
   218  		}
   219  		if lastEtag != "" && etag != lastEtag {
   220  			log.Printf("Binary updated; restarting.")
   221  			// TODO: more graceful restart, letting systemd own the network connections.
   222  			// Then we can finish up requests here.
   223  			os.Exit(0)
   224  		}
   225  		lastEtag = etag
   226  		time.Sleep(15 * time.Second)
   227  	}
   228  }
   229  
   230  // uploadBinary uploads the currently-running Linux binary.
   231  // It crashes if it fails.
   232  func (cl *cloudLaunch) uploadBinary() {
   233  	ctx := context.Background()
   234  	if cl.BinaryBucket == "" {
   235  		log.Fatal("cloudlaunch: Config.BinaryBucket is empty")
   236  	}
   237  	stoClient, err := storage.NewClient(ctx, option.WithHTTPClient(cl.oauthClient))
   238  	if err != nil {
   239  		log.Fatal(err)
   240  	}
   241  	w := stoClient.Bucket(cl.BinaryBucket).Object(cl.binaryObject()).NewWriter(ctx)
   242  	if err != nil {
   243  		log.Fatal(err)
   244  	}
   245  	w.ACL = []storage.ACLRule{
   246  		// If you don't give the owners access, the web UI seems to
   247  		// have a bug and doesn't have access to see that it's public, so
   248  		// won't render the "Shared Publicly" link. So we do that, even
   249  		// though it's dumb and unnecessary otherwise:
   250  		{
   251  			Entity: storage.ACLEntity("project-owners-" + cl.GCEProjectID),
   252  			Role:   storage.RoleOwner,
   253  		},
   254  		// Public, so our systemd unit can get it easily:
   255  		{
   256  			Entity: storage.AllUsers,
   257  			Role:   storage.RoleReader,
   258  		},
   259  	}
   260  	w.CacheControl = "no-cache"
   261  	selfPath := getSelfPath()
   262  	log.Printf("Uploading %q to %v", selfPath, cl.binaryURL())
   263  	f, err := os.Open(selfPath)
   264  	if err != nil {
   265  		log.Fatal(err)
   266  	}
   267  	defer f.Close()
   268  	n, err := io.Copy(w, f)
   269  	if err != nil {
   270  		log.Fatal(err)
   271  	}
   272  	if err := w.Close(); err != nil {
   273  		log.Fatal(err)
   274  	}
   275  	log.Printf("Uploaded %d bytes", n)
   276  }
   277  
   278  func getSelfPath() string {
   279  	if runtime.GOOS != "linux" {
   280  		panic("TODO")
   281  	}
   282  	v, err := os.Readlink("/proc/self/exe")
   283  	if err != nil {
   284  		log.Fatal(err)
   285  	}
   286  	return v
   287  }
   288  
   289  func zoneInRegion(zone, regionURL string) bool {
   290  	if zone == "" {
   291  		panic("empty zone")
   292  	}
   293  	if regionURL == "" {
   294  		panic("empty regionURL")
   295  	}
   296  	// zone is like "us-central1-f"
   297  	// regionURL is like "https://www.googleapis.com/compute/v1/projects/camlistore-website/regions/us-central1"
   298  	region := path.Base(regionURL) // "us-central1"
   299  	if region == "" {
   300  		panic("empty region")
   301  	}
   302  	return strings.HasPrefix(zone, region)
   303  }
   304  
   305  // findIP finds an IP address to use, or returns the empty string if none is found.
   306  // It tries to find a reserved one in the same region where the name of the reserved IP
   307  // is "NAME-ip" and the IP is not in use.
   308  func (cl *cloudLaunch) findIP() string {
   309  	// Try to find it by name.
   310  	aggAddrList, err := cl.computeService.Addresses.AggregatedList(cl.GCEProjectID).Do()
   311  	if err != nil {
   312  		log.Fatal(err)
   313  	}
   314  	// https://godoc.org/google.golang.org/api/compute/v1#AddressAggregatedList
   315  	var ip string
   316  IPLoop:
   317  	for _, asl := range aggAddrList.Items {
   318  		for _, addr := range asl.Addresses {
   319  			log.Printf("  addr: %#v", addr)
   320  			if addr.Name == cl.Name+"-ip" && addr.Status == "RESERVED" && zoneInRegion(cl.zone(), addr.Region) {
   321  				ip = addr.Address
   322  				break IPLoop
   323  			}
   324  		}
   325  	}
   326  	return ip
   327  }
   328  
   329  func (cl *cloudLaunch) createInstance() {
   330  	inst := cl.lookupInstance()
   331  	if inst != nil {
   332  		log.Printf("Instance exists; not re-creating.")
   333  		return
   334  	}
   335  
   336  	log.Printf("Instance doesn't exist; creating...")
   337  
   338  	ip := cl.findIP()
   339  	log.Printf("Found IP: %v", ip)
   340  
   341  	cloudConfig := strings.NewReplacer(
   342  		"$NAME", cl.Name,
   343  		"$URL", cl.binaryURL(),
   344  		"$REBOOT", cl.updateStrategy(),
   345  	).Replace(baseConfig)
   346  
   347  	instance := &compute.Instance{
   348  		Name:        cl.instName(),
   349  		Description: cl.Name,
   350  		MachineType: cl.machineTypeURL(),
   351  		Disks:       []*compute.AttachedDisk{cl.instanceDisk()},
   352  		Tags: &compute.Tags{
   353  			Items: []string{"http-server", "https-server"},
   354  		},
   355  		Metadata: &compute.Metadata{
   356  			Items: []*compute.MetadataItems{
   357  				{
   358  					Key:   "user-data",
   359  					Value: googleapi.String(cloudConfig),
   360  				},
   361  			},
   362  		},
   363  		NetworkInterfaces: []*compute.NetworkInterface{
   364  			&compute.NetworkInterface{
   365  				AccessConfigs: []*compute.AccessConfig{
   366  					&compute.AccessConfig{
   367  						Type:  "ONE_TO_ONE_NAT",
   368  						Name:  "External NAT",
   369  						NatIP: ip,
   370  					},
   371  				},
   372  				Network: cl.projectAPIURL() + "/global/networks/default",
   373  			},
   374  		},
   375  		ServiceAccounts: []*compute.ServiceAccount{
   376  			{
   377  				Email:  "default",
   378  				Scopes: cl.Scopes,
   379  			},
   380  		},
   381  	}
   382  
   383  	log.Printf("Creating instance...")
   384  	op, err := cl.computeService.Instances.Insert(cl.GCEProjectID, cl.zone(), instance).Do()
   385  	if err != nil {
   386  		log.Fatalf("Failed to create instance: %v", err)
   387  	}
   388  	opName := op.Name
   389  	log.Printf("Created. Waiting on operation %v", opName)
   390  OpLoop:
   391  	for {
   392  		time.Sleep(2 * time.Second)
   393  		op, err := cl.computeService.ZoneOperations.Get(cl.GCEProjectID, cl.zone(), opName).Do()
   394  		if err != nil {
   395  			log.Fatalf("Failed to get op %s: %v", opName, err)
   396  		}
   397  		switch op.Status {
   398  		case "PENDING", "RUNNING":
   399  			log.Printf("Waiting on operation %v", opName)
   400  			continue
   401  		case "DONE":
   402  			if op.Error != nil {
   403  				for _, operr := range op.Error.Errors {
   404  					log.Printf("Error: %+v", operr)
   405  				}
   406  				log.Fatalf("Failed to start.")
   407  			}
   408  			log.Printf("Success. %+v", op)
   409  			break OpLoop
   410  		default:
   411  			log.Fatalf("Unknown status %q: %+v", op.Status, op)
   412  		}
   413  	}
   414  
   415  	inst, err = cl.computeService.Instances.Get(cl.GCEProjectID, cl.zone(), cl.instName()).Do()
   416  	if err != nil {
   417  		log.Fatalf("Error getting instance after creation: %v", err)
   418  	}
   419  	ij, _ := json.MarshalIndent(inst, "", "    ")
   420  	log.Printf("%s", ij)
   421  	log.Printf("Instance created.")
   422  	os.Exit(0)
   423  }
   424  
   425  // returns nil if instance doesn't exist.
   426  func (cl *cloudLaunch) lookupInstance() *compute.Instance {
   427  	inst, err := cl.computeService.Instances.Get(cl.GCEProjectID, cl.zone(), cl.instName()).Do()
   428  	if ae, ok := err.(*googleapi.Error); ok && ae.Code == 404 {
   429  		return nil
   430  	} else if err != nil {
   431  		log.Fatalf("Instances.Get: %v", err)
   432  	}
   433  	return inst
   434  }
   435  
   436  func (cl *cloudLaunch) instanceDisk() *compute.AttachedDisk {
   437  	imageURL, err := gceutil.CoreOSImageURL(cl.oauthClient)
   438  	if err != nil {
   439  		log.Fatalf("error looking up latest CoreOS stable image: %v", err)
   440  	}
   441  	diskName := cl.instName() + "-coreos-stateless-pd"
   442  	var diskType string
   443  	if cl.SSD {
   444  		diskType = cl.projectAPIURL() + "/zones/" + cl.zone() + "/diskTypes/pd-ssd"
   445  	}
   446  	return &compute.AttachedDisk{
   447  		AutoDelete: true,
   448  		Boot:       true,
   449  		Type:       "PERSISTENT",
   450  		InitializeParams: &compute.AttachedDiskInitializeParams{
   451  			DiskName:    diskName,
   452  			SourceImage: imageURL,
   453  			DiskSizeGb:  50,
   454  			DiskType:    diskType,
   455  		},
   456  	}
   457  }