github.com/coreos/mantle@v0.13.0/platform/api/packet/api.go (about)

     1  // Copyright 2017 CoreOS, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package packet
    16  
    17  import (
    18  	"bytes"
    19  	"crypto/rand"
    20  	"encoding/base64"
    21  	"encoding/json"
    22  	"fmt"
    23  	"io"
    24  	"net"
    25  	"strings"
    26  	"time"
    27  
    28  	ignition "github.com/coreos/ignition/config/v2_0/types"
    29  	"github.com/coreos/pkg/capnslog"
    30  	"github.com/packethost/packngo"
    31  	"golang.org/x/crypto/ssh"
    32  	"golang.org/x/net/context"
    33  	gs "google.golang.org/api/storage/v1"
    34  
    35  	"github.com/coreos/mantle/auth"
    36  	"github.com/coreos/mantle/platform"
    37  	"github.com/coreos/mantle/platform/api/gcloud"
    38  	"github.com/coreos/mantle/platform/conf"
    39  	"github.com/coreos/mantle/storage"
    40  	"github.com/coreos/mantle/util"
    41  )
    42  
    43  const (
    44  	// Provisioning a VM is supposed to take < 8 minutes, but in practice can take longer.
    45  	launchTimeout       = 10 * time.Minute
    46  	launchPollInterval  = 30 * time.Second
    47  	installTimeout      = 15 * time.Minute
    48  	installPollInterval = 5 * time.Second
    49  	apiRetries          = 3
    50  	apiRetryInterval    = 5 * time.Second
    51  )
    52  
    53  var (
    54  	plog = capnslog.NewPackageLogger("github.com/coreos/mantle", "platform/api/packet")
    55  
    56  	defaultInstallerImageBaseURL = map[string]string{
    57  		// HTTPS causes iPXE to fail on a "permission denied" error
    58  		"amd64-usr": "http://stable.release.core-os.net/amd64-usr/current",
    59  		"arm64-usr": "http://beta.release.core-os.net/arm64-usr/current",
    60  	}
    61  	defaultImageURL = map[string]string{
    62  		"amd64-usr": "https://alpha.release.core-os.net/amd64-usr/current/coreos_production_packet_image.bin.bz2",
    63  		"arm64-usr": "https://alpha.release.core-os.net/arm64-usr/current/coreos_production_packet_image.bin.bz2",
    64  	}
    65  	defaultPlan = map[string]string{
    66  		"amd64-usr": "baremetal_0",
    67  		"arm64-usr": "baremetal_2a",
    68  	}
    69  	linuxConsole = map[string]string{
    70  		"amd64-usr": "ttyS1,115200",
    71  		"arm64-usr": "ttyAMA0,115200",
    72  	}
    73  )
    74  
    75  type Options struct {
    76  	*platform.Options
    77  
    78  	// Config file. Defaults to $HOME/.config/packet.json.
    79  	ConfigPath string
    80  	// Profile name
    81  	Profile string
    82  	// API key (overrides config profile)
    83  	ApiKey string
    84  	// Project UUID (overrides config profile)
    85  	Project string
    86  
    87  	// Packet location code
    88  	Facility string
    89  	// Slug of the device type (e.g. "baremetal_0")
    90  	Plan string
    91  	// The Container Linux board name
    92  	Board string
    93  	// e.g. http://alpha.release.core-os.net/amd64-usr/current
    94  	InstallerImageBaseURL string
    95  	// e.g. https://alpha.release.core-os.net/amd64-usr/current/coreos_production_packet_image.bin.bz2
    96  	ImageURL string
    97  
    98  	// Options for Google Storage
    99  	GSOptions *gcloud.Options
   100  	// Google Storage base URL for temporary uploads
   101  	// e.g. gs://users.developer.core-os.net/bovik/mantle
   102  	StorageURL string
   103  }
   104  
   105  type API struct {
   106  	c      *packngo.Client
   107  	bucket *storage.Bucket
   108  	opts   *Options
   109  }
   110  
   111  type Console interface {
   112  	io.WriteCloser
   113  	SSHClient(ip, user string) (*ssh.Client, error)
   114  }
   115  
   116  func New(opts *Options) (*API, error) {
   117  	if opts.ApiKey == "" || opts.Project == "" {
   118  		profiles, err := auth.ReadPacketConfig(opts.ConfigPath)
   119  		if err != nil {
   120  			return nil, fmt.Errorf("couldn't read Packet config: %v", err)
   121  		}
   122  
   123  		if opts.Profile == "" {
   124  			opts.Profile = "default"
   125  		}
   126  		profile, ok := profiles[opts.Profile]
   127  		if !ok {
   128  			return nil, fmt.Errorf("no such profile %q", opts.Profile)
   129  		}
   130  		if opts.ApiKey == "" {
   131  			opts.ApiKey = profile.ApiKey
   132  		}
   133  		if opts.Project == "" {
   134  			opts.Project = profile.Project
   135  		}
   136  	}
   137  
   138  	_, ok := linuxConsole[opts.Board]
   139  	if !ok {
   140  		return nil, fmt.Errorf("unknown board %q", opts.Board)
   141  	}
   142  	if opts.Plan == "" {
   143  		opts.Plan = defaultPlan[opts.Board]
   144  	}
   145  	if opts.InstallerImageBaseURL == "" {
   146  		opts.InstallerImageBaseURL = defaultInstallerImageBaseURL[opts.Board]
   147  	}
   148  	if opts.ImageURL == "" {
   149  		opts.ImageURL = defaultImageURL[opts.Board]
   150  	}
   151  
   152  	gapi, err := gcloud.New(opts.GSOptions)
   153  	if err != nil {
   154  		return nil, fmt.Errorf("connecting to Google Storage: %v", err)
   155  	}
   156  	bucket, err := storage.NewBucket(gapi.Client(), opts.StorageURL)
   157  	if err != nil {
   158  		return nil, fmt.Errorf("connecting to Google Storage bucket: %v", err)
   159  	}
   160  
   161  	client := packngo.NewClient("github.com/coreos/mantle", opts.ApiKey, nil)
   162  
   163  	return &API{
   164  		c:      client,
   165  		bucket: bucket,
   166  		opts:   opts,
   167  	}, nil
   168  }
   169  
   170  func (a *API) PreflightCheck() error {
   171  	_, _, err := a.c.Projects.Get(a.opts.Project)
   172  	if err != nil {
   173  		return fmt.Errorf("querying project %v: %v", a.opts.Project, err)
   174  	}
   175  	return nil
   176  }
   177  
   178  // console is optional, and is closed on error or when the device is deleted.
   179  func (a *API) CreateDevice(hostname string, conf *conf.Conf, console Console) (*packngo.Device, error) {
   180  	consoleStarted := false
   181  	defer func() {
   182  		if console != nil && !consoleStarted {
   183  			console.Close()
   184  		}
   185  	}()
   186  
   187  	userdata, err := a.wrapUserData(conf)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  
   192  	// The Ignition config can't go in userdata via coreos.config.url=https://metadata.packet.net/userdata because Ignition supplies an Accept header that metadata.packet.net finds 406 Not Acceptable.
   193  	// It can't go in userdata via coreos.oem.id=packet because the Packet OEM expects unit files in /usr/share/oem which the PXE image doesn't have.
   194  	userdataName, userdataURL, err := a.uploadObject(hostname, "application/vnd.coreos.ignition+json", []byte(userdata))
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  	defer a.bucket.Delete(context.TODO(), userdataName)
   199  
   200  	// This can't go in userdata because the installed coreos-cloudinit will try to execute it.
   201  	ipxeScriptName, ipxeScriptURL, err := a.uploadObject(hostname, "application/octet-stream", []byte(a.ipxeScript(userdataURL)))
   202  	if err != nil {
   203  		return nil, err
   204  	}
   205  	defer a.bucket.Delete(context.TODO(), ipxeScriptName)
   206  
   207  	device, err := a.createDevice(hostname, ipxeScriptURL)
   208  	if err != nil {
   209  		return nil, fmt.Errorf("couldn't create device: %v", err)
   210  	}
   211  	deviceID := device.ID
   212  
   213  	if console != nil {
   214  		err := a.startConsole(deviceID, console)
   215  		consoleStarted = true
   216  		if err != nil {
   217  			a.DeleteDevice(deviceID)
   218  			return nil, err
   219  		}
   220  	}
   221  
   222  	device, err = a.waitForActive(deviceID)
   223  	if err != nil {
   224  		a.DeleteDevice(deviceID)
   225  		return nil, err
   226  	}
   227  
   228  	ipAddress := a.GetDeviceAddress(device, 4, true)
   229  	if ipAddress == "" {
   230  		a.DeleteDevice(deviceID)
   231  		return nil, fmt.Errorf("no public IP address found for %v", deviceID)
   232  	}
   233  
   234  	err = waitForInstall(ipAddress)
   235  	if err != nil {
   236  		a.DeleteDevice(deviceID)
   237  		return nil, fmt.Errorf("timed out waiting for coreos-install: %v", err)
   238  	}
   239  
   240  	return device, nil
   241  }
   242  
   243  func (a *API) DeleteDevice(deviceID string) error {
   244  	_, err := a.c.Devices.Delete(deviceID)
   245  	if err != nil {
   246  		return fmt.Errorf("deleting device %q: %v", deviceID, err)
   247  	}
   248  	return nil
   249  }
   250  
   251  func (a *API) GetDeviceAddress(device *packngo.Device, family int, public bool) string {
   252  	for _, address := range device.Network {
   253  		if address.AddressFamily == family && address.Public == public {
   254  			return address.Address
   255  		}
   256  	}
   257  	return ""
   258  }
   259  
   260  func (a *API) AddKey(name, key string) (string, error) {
   261  	sshKey, _, err := a.c.SSHKeys.Create(&packngo.SSHKeyCreateRequest{
   262  		Label: name,
   263  		Key:   key,
   264  	})
   265  	if err != nil {
   266  		return "", fmt.Errorf("couldn't create SSH key: %v", err)
   267  	}
   268  	return sshKey.ID, nil
   269  }
   270  
   271  func (a *API) DeleteKey(keyID string) error {
   272  	_, err := a.c.SSHKeys.Delete(keyID)
   273  	if err != nil {
   274  		return fmt.Errorf("couldn't delete SSH key: %v", err)
   275  	}
   276  	return nil
   277  }
   278  
   279  func (a *API) ListKeys() ([]packngo.SSHKey, error) {
   280  	keys, _, err := a.c.SSHKeys.List()
   281  	if err != nil {
   282  		return nil, fmt.Errorf("couldn't list SSH keys: %v", err)
   283  	}
   284  	return keys, nil
   285  }
   286  
   287  func (a *API) wrapUserData(conf *conf.Conf) (string, error) {
   288  	userDataOption := "-i"
   289  	if !conf.IsIgnition() && conf.String() != "" {
   290  		// By providing a no-op Ignition config, we prevent Ignition
   291  		// from enabling oem-cloudinit.service, which is unordered
   292  		// with respect to the cloud-config installed by the -c
   293  		// option. Otherwise it might override settings in the
   294  		// cloud-config with defaults obtained from the Packet
   295  		// metadata endpoint.
   296  		userDataOption = "-i /noop.ign -c"
   297  	}
   298  	escapedImageURL := strings.Replace(a.opts.ImageURL, "%", "%%", -1)
   299  
   300  	// make systemd units
   301  	discardSocketUnit := `
   302  [Unit]
   303  Description=Discard Socket
   304  
   305  [Socket]
   306  ListenStream=0.0.0.0:9
   307  Accept=true
   308  
   309  [Install]
   310  WantedBy=multi-user.target
   311  `
   312  	discardServiceUnit := `
   313  [Unit]
   314  Description=Discard Service
   315  Requires=discard.socket
   316  
   317  [Service]
   318  ExecStart=/usr/bin/cat
   319  StandardInput=socket
   320  StandardOutput=null
   321  `
   322  	installUnit := fmt.Sprintf(`
   323  [Unit]
   324  Description=Install Container Linux
   325  
   326  Requires=network-online.target
   327  After=network-online.target
   328  
   329  Requires=dev-sda.device
   330  After=dev-sda.device
   331  
   332  [Service]
   333  Type=oneshot
   334  # Prevent coreos-install from validating cloud-config
   335  Environment=PATH=/root/bin:/usr/sbin:/usr/bin
   336  
   337  ExecStart=/usr/bin/curl -fo image.bin.bz2 "%v"
   338  # We don't verify signatures because the iPXE script isn't verified either
   339  # (and, in fact, is transferred over HTTP)
   340  
   341  ExecStart=/usr/bin/coreos-install -d /dev/sda -f image.bin.bz2 %v /userdata
   342  
   343  ExecStart=/usr/bin/systemctl --no-block isolate reboot.target
   344  
   345  StandardOutput=journal+console
   346  StandardError=journal+console
   347  
   348  [Install]
   349  RequiredBy=multi-user.target
   350  `, escapedImageURL, userDataOption)
   351  
   352  	// make workarounds
   353  	noopIgnitionConfig := base64.StdEncoding.EncodeToString([]byte(`{"ignition": {"version": "2.1.0"}}`))
   354  	coreosCloudInit := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\nexit 0"))
   355  
   356  	// make Ignition config
   357  	b64UserData := base64.StdEncoding.EncodeToString(conf.Bytes())
   358  	var buf bytes.Buffer
   359  	err := json.NewEncoder(&buf).Encode(ignition.Config{
   360  		Ignition: ignition.Ignition{
   361  			Version: ignition.IgnitionVersion{Major: 2},
   362  		},
   363  		Storage: ignition.Storage{
   364  			Files: []ignition.File{
   365  				ignition.File{
   366  					Filesystem: "root",
   367  					Path:       "/userdata",
   368  					Contents: ignition.FileContents{
   369  						Source: ignition.Url{
   370  							Scheme: "data",
   371  							Opaque: ";base64," + b64UserData,
   372  						},
   373  					},
   374  					Mode: 0644,
   375  				},
   376  				ignition.File{
   377  					Filesystem: "root",
   378  					Path:       "/noop.ign",
   379  					Contents: ignition.FileContents{
   380  						Source: ignition.Url{
   381  							Scheme: "data",
   382  							Opaque: ";base64," + noopIgnitionConfig,
   383  						},
   384  					},
   385  					Mode: 0644,
   386  				},
   387  				ignition.File{
   388  					Filesystem: "root",
   389  					Path:       "/root/bin/coreos-cloudinit",
   390  					Contents: ignition.FileContents{
   391  						Source: ignition.Url{
   392  							Scheme: "data",
   393  							Opaque: ";base64," + coreosCloudInit,
   394  						},
   395  					},
   396  					Mode: 0755,
   397  				},
   398  			},
   399  		},
   400  		Systemd: ignition.Systemd{
   401  			Units: []ignition.SystemdUnit{
   402  				ignition.SystemdUnit{
   403  					// don't appear to be running while install is in progress
   404  					Name: "sshd.socket",
   405  					Mask: true,
   406  				},
   407  				ignition.SystemdUnit{
   408  					// future-proofing
   409  					Name: "sshd.service",
   410  					Mask: true,
   411  				},
   412  				ignition.SystemdUnit{
   413  					// allow remote detection of install in progress
   414  					Name:     "discard.socket",
   415  					Enable:   true,
   416  					Contents: discardSocketUnit,
   417  				},
   418  				ignition.SystemdUnit{
   419  					Name:     "discard@.service",
   420  					Contents: discardServiceUnit,
   421  				},
   422  				ignition.SystemdUnit{
   423  					Name:     "coreos-install.service",
   424  					Enable:   true,
   425  					Contents: installUnit,
   426  				},
   427  			},
   428  		},
   429  	})
   430  	if err != nil {
   431  		return "", fmt.Errorf("encoding Ignition config: %v", err)
   432  	}
   433  
   434  	return buf.String(), nil
   435  }
   436  
   437  func (a *API) uploadObject(hostname, contentType string, data []byte) (string, string, error) {
   438  	if hostname == "" {
   439  		hostname = "mantle"
   440  	}
   441  	b := make([]byte, 5)
   442  	rand.Read(b)
   443  	name := fmt.Sprintf("%s-%x", hostname, b)
   444  
   445  	obj := gs.Object{
   446  		Name:        a.bucket.Prefix() + name,
   447  		ContentType: contentType,
   448  	}
   449  	err := a.bucket.Upload(context.TODO(), &obj, bytes.NewReader(data))
   450  	if err != nil {
   451  		return "", "", fmt.Errorf("uploading object: %v", err)
   452  	}
   453  
   454  	// HTTPS causes iPXE to fail on a "permission denied" error
   455  	url := fmt.Sprintf("http://storage-download.googleapis.com/%v/%v", a.bucket.Name(), obj.Name)
   456  	return obj.Name, url, nil
   457  }
   458  
   459  func (a *API) ipxeScript(userdataURL string) string {
   460  	return fmt.Sprintf(`#!ipxe
   461  set base-url %s
   462  kernel ${base-url}/coreos_production_pxe.vmlinuz initrd=coreos_production_pxe_image.cpio.gz coreos.first_boot=1 coreos.config.url=%s console=%s
   463  initrd ${base-url}/coreos_production_pxe_image.cpio.gz
   464  boot`, strings.TrimRight(a.opts.InstallerImageBaseURL, "/"), userdataURL, linuxConsole[a.opts.Board])
   465  }
   466  
   467  // device creation seems a bit flaky, so try a few times
   468  func (a *API) createDevice(hostname, ipxeScriptURL string) (device *packngo.Device, err error) {
   469  	for tries := apiRetries; tries >= 0; tries-- {
   470  		var response *packngo.Response
   471  		device, response, err = a.c.Devices.Create(&packngo.DeviceCreateRequest{
   472  			ProjectID:     a.opts.Project,
   473  			Facility:      a.opts.Facility,
   474  			Plan:          a.opts.Plan,
   475  			BillingCycle:  "hourly",
   476  			Hostname:      hostname,
   477  			OS:            "custom_ipxe",
   478  			IPXEScriptURL: ipxeScriptURL,
   479  			Tags:          []string{"mantle"},
   480  		})
   481  		if err == nil || response.StatusCode != 500 {
   482  			return
   483  		}
   484  		if tries > 0 {
   485  			time.Sleep(apiRetryInterval)
   486  		}
   487  	}
   488  	return
   489  }
   490  
   491  func (a *API) startConsole(deviceID string, console Console) error {
   492  	ready := make(chan error)
   493  
   494  	runner := func() error {
   495  		defer console.Close()
   496  
   497  		client, err := console.SSHClient("sos."+a.opts.Facility+".packet.net", deviceID)
   498  		if err != nil {
   499  			return fmt.Errorf("couldn't create SSH client for %s console: %v", deviceID, err)
   500  		}
   501  		defer client.Close()
   502  
   503  		session, err := client.NewSession()
   504  		if err != nil {
   505  			return fmt.Errorf("couldn't create SSH session for %s console: %v", deviceID, err)
   506  		}
   507  		defer session.Close()
   508  
   509  		reader, writer := io.Pipe()
   510  		defer writer.Close()
   511  
   512  		session.Stdin = reader
   513  		session.Stdout = console
   514  		if err := session.Shell(); err != nil {
   515  			return fmt.Errorf("couldn't start shell for %s console: %v", deviceID, err)
   516  		}
   517  
   518  		// cause startConsole to return
   519  		ready <- nil
   520  
   521  		err = session.Wait()
   522  		_, ok := err.(*ssh.ExitMissingError)
   523  		if err != nil && !ok {
   524  			plog.Errorf("%s console session failed: %v", deviceID, err)
   525  		}
   526  		return nil
   527  	}
   528  	go func() {
   529  		err := runner()
   530  		if err != nil {
   531  			ready <- err
   532  		}
   533  	}()
   534  
   535  	return <-ready
   536  }
   537  
   538  func (a *API) waitForActive(deviceID string) (*packngo.Device, error) {
   539  	var device *packngo.Device
   540  	err := util.WaitUntilReady(launchTimeout, launchPollInterval, func() (bool, error) {
   541  		var err error
   542  		device, _, err = a.c.Devices.Get(deviceID)
   543  		if err != nil {
   544  			return false, fmt.Errorf("querying device: %v", err)
   545  		}
   546  		return device.State == "active", nil
   547  	})
   548  	if err != nil {
   549  		return nil, err
   550  	}
   551  	return device, nil
   552  }
   553  
   554  // Connect to the discard port and wait for the connection to close,
   555  // indicating that install is complete.
   556  func waitForInstall(address string) (err error) {
   557  	deadline := time.Now().Add(installTimeout)
   558  	dialer := net.Dialer{
   559  		Timeout: installPollInterval,
   560  	}
   561  	for tries := installTimeout / installPollInterval; tries >= 0; tries-- {
   562  		var conn net.Conn
   563  		start := time.Now()
   564  		conn, err = dialer.Dial("tcp", address+":9")
   565  		if err == nil {
   566  			defer conn.Close()
   567  			conn.SetDeadline(deadline)
   568  			_, err = conn.Read([]byte{0})
   569  			if err == io.EOF {
   570  				err = nil
   571  			}
   572  			return
   573  		}
   574  		if tries > 0 {
   575  			// If Dial returned an error before the timeout,
   576  			// e.g. because the device returned ECONNREFUSED,
   577  			// wait out the rest of the interval.
   578  			time.Sleep(installPollInterval - time.Now().Sub(start))
   579  		}
   580  	}
   581  	return
   582  }
   583  
   584  func (a *API) GC(gracePeriod time.Duration) error {
   585  	threshold := time.Now().Add(-gracePeriod)
   586  
   587  	page := packngo.ListOptions{
   588  		Page:    1,
   589  		PerPage: 1000,
   590  	}
   591  
   592  	for {
   593  		devices, _, err := a.c.Devices.List(a.opts.Project, &page)
   594  		if err != nil {
   595  			return fmt.Errorf("listing devices: %v", err)
   596  		}
   597  		for _, device := range devices {
   598  			tagged := false
   599  			for _, tag := range device.Tags {
   600  				if tag == "mantle" {
   601  					tagged = true
   602  					break
   603  				}
   604  			}
   605  			if !tagged {
   606  				continue
   607  			}
   608  
   609  			switch device.State {
   610  			case "queued", "provisioning":
   611  				continue
   612  			}
   613  
   614  			if device.Locked {
   615  				continue
   616  			}
   617  
   618  			created, err := time.Parse(time.RFC3339, device.Created)
   619  			if err != nil {
   620  				return fmt.Errorf("couldn't parse %q: %v", device.Created, err)
   621  			}
   622  			if created.After(threshold) {
   623  				continue
   624  			}
   625  
   626  			if err := a.DeleteDevice(device.ID); err != nil {
   627  				return fmt.Errorf("couldn't delete device %v: %v", device.ID, err)
   628  			}
   629  		}
   630  		if len(devices) < page.PerPage {
   631  			return nil
   632  		}
   633  		page.Page += 1
   634  	}
   635  }