github.com/coreos/mantle@v0.13.0/platform/api/packet/api.go (about) 1 // Copyright 2017 CoreOS, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package packet 16 17 import ( 18 "bytes" 19 "crypto/rand" 20 "encoding/base64" 21 "encoding/json" 22 "fmt" 23 "io" 24 "net" 25 "strings" 26 "time" 27 28 ignition "github.com/coreos/ignition/config/v2_0/types" 29 "github.com/coreos/pkg/capnslog" 30 "github.com/packethost/packngo" 31 "golang.org/x/crypto/ssh" 32 "golang.org/x/net/context" 33 gs "google.golang.org/api/storage/v1" 34 35 "github.com/coreos/mantle/auth" 36 "github.com/coreos/mantle/platform" 37 "github.com/coreos/mantle/platform/api/gcloud" 38 "github.com/coreos/mantle/platform/conf" 39 "github.com/coreos/mantle/storage" 40 "github.com/coreos/mantle/util" 41 ) 42 43 const ( 44 // Provisioning a VM is supposed to take < 8 minutes, but in practice can take longer. 45 launchTimeout = 10 * time.Minute 46 launchPollInterval = 30 * time.Second 47 installTimeout = 15 * time.Minute 48 installPollInterval = 5 * time.Second 49 apiRetries = 3 50 apiRetryInterval = 5 * time.Second 51 ) 52 53 var ( 54 plog = capnslog.NewPackageLogger("github.com/coreos/mantle", "platform/api/packet") 55 56 defaultInstallerImageBaseURL = map[string]string{ 57 // HTTPS causes iPXE to fail on a "permission denied" error 58 "amd64-usr": "http://stable.release.core-os.net/amd64-usr/current", 59 "arm64-usr": "http://beta.release.core-os.net/arm64-usr/current", 60 } 61 defaultImageURL = map[string]string{ 62 "amd64-usr": "https://alpha.release.core-os.net/amd64-usr/current/coreos_production_packet_image.bin.bz2", 63 "arm64-usr": "https://alpha.release.core-os.net/arm64-usr/current/coreos_production_packet_image.bin.bz2", 64 } 65 defaultPlan = map[string]string{ 66 "amd64-usr": "baremetal_0", 67 "arm64-usr": "baremetal_2a", 68 } 69 linuxConsole = map[string]string{ 70 "amd64-usr": "ttyS1,115200", 71 "arm64-usr": "ttyAMA0,115200", 72 } 73 ) 74 75 type Options struct { 76 *platform.Options 77 78 // Config file. Defaults to $HOME/.config/packet.json. 79 ConfigPath string 80 // Profile name 81 Profile string 82 // API key (overrides config profile) 83 ApiKey string 84 // Project UUID (overrides config profile) 85 Project string 86 87 // Packet location code 88 Facility string 89 // Slug of the device type (e.g. "baremetal_0") 90 Plan string 91 // The Container Linux board name 92 Board string 93 // e.g. http://alpha.release.core-os.net/amd64-usr/current 94 InstallerImageBaseURL string 95 // e.g. https://alpha.release.core-os.net/amd64-usr/current/coreos_production_packet_image.bin.bz2 96 ImageURL string 97 98 // Options for Google Storage 99 GSOptions *gcloud.Options 100 // Google Storage base URL for temporary uploads 101 // e.g. gs://users.developer.core-os.net/bovik/mantle 102 StorageURL string 103 } 104 105 type API struct { 106 c *packngo.Client 107 bucket *storage.Bucket 108 opts *Options 109 } 110 111 type Console interface { 112 io.WriteCloser 113 SSHClient(ip, user string) (*ssh.Client, error) 114 } 115 116 func New(opts *Options) (*API, error) { 117 if opts.ApiKey == "" || opts.Project == "" { 118 profiles, err := auth.ReadPacketConfig(opts.ConfigPath) 119 if err != nil { 120 return nil, fmt.Errorf("couldn't read Packet config: %v", err) 121 } 122 123 if opts.Profile == "" { 124 opts.Profile = "default" 125 } 126 profile, ok := profiles[opts.Profile] 127 if !ok { 128 return nil, fmt.Errorf("no such profile %q", opts.Profile) 129 } 130 if opts.ApiKey == "" { 131 opts.ApiKey = profile.ApiKey 132 } 133 if opts.Project == "" { 134 opts.Project = profile.Project 135 } 136 } 137 138 _, ok := linuxConsole[opts.Board] 139 if !ok { 140 return nil, fmt.Errorf("unknown board %q", opts.Board) 141 } 142 if opts.Plan == "" { 143 opts.Plan = defaultPlan[opts.Board] 144 } 145 if opts.InstallerImageBaseURL == "" { 146 opts.InstallerImageBaseURL = defaultInstallerImageBaseURL[opts.Board] 147 } 148 if opts.ImageURL == "" { 149 opts.ImageURL = defaultImageURL[opts.Board] 150 } 151 152 gapi, err := gcloud.New(opts.GSOptions) 153 if err != nil { 154 return nil, fmt.Errorf("connecting to Google Storage: %v", err) 155 } 156 bucket, err := storage.NewBucket(gapi.Client(), opts.StorageURL) 157 if err != nil { 158 return nil, fmt.Errorf("connecting to Google Storage bucket: %v", err) 159 } 160 161 client := packngo.NewClient("github.com/coreos/mantle", opts.ApiKey, nil) 162 163 return &API{ 164 c: client, 165 bucket: bucket, 166 opts: opts, 167 }, nil 168 } 169 170 func (a *API) PreflightCheck() error { 171 _, _, err := a.c.Projects.Get(a.opts.Project) 172 if err != nil { 173 return fmt.Errorf("querying project %v: %v", a.opts.Project, err) 174 } 175 return nil 176 } 177 178 // console is optional, and is closed on error or when the device is deleted. 179 func (a *API) CreateDevice(hostname string, conf *conf.Conf, console Console) (*packngo.Device, error) { 180 consoleStarted := false 181 defer func() { 182 if console != nil && !consoleStarted { 183 console.Close() 184 } 185 }() 186 187 userdata, err := a.wrapUserData(conf) 188 if err != nil { 189 return nil, err 190 } 191 192 // The Ignition config can't go in userdata via coreos.config.url=https://metadata.packet.net/userdata because Ignition supplies an Accept header that metadata.packet.net finds 406 Not Acceptable. 193 // It can't go in userdata via coreos.oem.id=packet because the Packet OEM expects unit files in /usr/share/oem which the PXE image doesn't have. 194 userdataName, userdataURL, err := a.uploadObject(hostname, "application/vnd.coreos.ignition+json", []byte(userdata)) 195 if err != nil { 196 return nil, err 197 } 198 defer a.bucket.Delete(context.TODO(), userdataName) 199 200 // This can't go in userdata because the installed coreos-cloudinit will try to execute it. 201 ipxeScriptName, ipxeScriptURL, err := a.uploadObject(hostname, "application/octet-stream", []byte(a.ipxeScript(userdataURL))) 202 if err != nil { 203 return nil, err 204 } 205 defer a.bucket.Delete(context.TODO(), ipxeScriptName) 206 207 device, err := a.createDevice(hostname, ipxeScriptURL) 208 if err != nil { 209 return nil, fmt.Errorf("couldn't create device: %v", err) 210 } 211 deviceID := device.ID 212 213 if console != nil { 214 err := a.startConsole(deviceID, console) 215 consoleStarted = true 216 if err != nil { 217 a.DeleteDevice(deviceID) 218 return nil, err 219 } 220 } 221 222 device, err = a.waitForActive(deviceID) 223 if err != nil { 224 a.DeleteDevice(deviceID) 225 return nil, err 226 } 227 228 ipAddress := a.GetDeviceAddress(device, 4, true) 229 if ipAddress == "" { 230 a.DeleteDevice(deviceID) 231 return nil, fmt.Errorf("no public IP address found for %v", deviceID) 232 } 233 234 err = waitForInstall(ipAddress) 235 if err != nil { 236 a.DeleteDevice(deviceID) 237 return nil, fmt.Errorf("timed out waiting for coreos-install: %v", err) 238 } 239 240 return device, nil 241 } 242 243 func (a *API) DeleteDevice(deviceID string) error { 244 _, err := a.c.Devices.Delete(deviceID) 245 if err != nil { 246 return fmt.Errorf("deleting device %q: %v", deviceID, err) 247 } 248 return nil 249 } 250 251 func (a *API) GetDeviceAddress(device *packngo.Device, family int, public bool) string { 252 for _, address := range device.Network { 253 if address.AddressFamily == family && address.Public == public { 254 return address.Address 255 } 256 } 257 return "" 258 } 259 260 func (a *API) AddKey(name, key string) (string, error) { 261 sshKey, _, err := a.c.SSHKeys.Create(&packngo.SSHKeyCreateRequest{ 262 Label: name, 263 Key: key, 264 }) 265 if err != nil { 266 return "", fmt.Errorf("couldn't create SSH key: %v", err) 267 } 268 return sshKey.ID, nil 269 } 270 271 func (a *API) DeleteKey(keyID string) error { 272 _, err := a.c.SSHKeys.Delete(keyID) 273 if err != nil { 274 return fmt.Errorf("couldn't delete SSH key: %v", err) 275 } 276 return nil 277 } 278 279 func (a *API) ListKeys() ([]packngo.SSHKey, error) { 280 keys, _, err := a.c.SSHKeys.List() 281 if err != nil { 282 return nil, fmt.Errorf("couldn't list SSH keys: %v", err) 283 } 284 return keys, nil 285 } 286 287 func (a *API) wrapUserData(conf *conf.Conf) (string, error) { 288 userDataOption := "-i" 289 if !conf.IsIgnition() && conf.String() != "" { 290 // By providing a no-op Ignition config, we prevent Ignition 291 // from enabling oem-cloudinit.service, which is unordered 292 // with respect to the cloud-config installed by the -c 293 // option. Otherwise it might override settings in the 294 // cloud-config with defaults obtained from the Packet 295 // metadata endpoint. 296 userDataOption = "-i /noop.ign -c" 297 } 298 escapedImageURL := strings.Replace(a.opts.ImageURL, "%", "%%", -1) 299 300 // make systemd units 301 discardSocketUnit := ` 302 [Unit] 303 Description=Discard Socket 304 305 [Socket] 306 ListenStream=0.0.0.0:9 307 Accept=true 308 309 [Install] 310 WantedBy=multi-user.target 311 ` 312 discardServiceUnit := ` 313 [Unit] 314 Description=Discard Service 315 Requires=discard.socket 316 317 [Service] 318 ExecStart=/usr/bin/cat 319 StandardInput=socket 320 StandardOutput=null 321 ` 322 installUnit := fmt.Sprintf(` 323 [Unit] 324 Description=Install Container Linux 325 326 Requires=network-online.target 327 After=network-online.target 328 329 Requires=dev-sda.device 330 After=dev-sda.device 331 332 [Service] 333 Type=oneshot 334 # Prevent coreos-install from validating cloud-config 335 Environment=PATH=/root/bin:/usr/sbin:/usr/bin 336 337 ExecStart=/usr/bin/curl -fo image.bin.bz2 "%v" 338 # We don't verify signatures because the iPXE script isn't verified either 339 # (and, in fact, is transferred over HTTP) 340 341 ExecStart=/usr/bin/coreos-install -d /dev/sda -f image.bin.bz2 %v /userdata 342 343 ExecStart=/usr/bin/systemctl --no-block isolate reboot.target 344 345 StandardOutput=journal+console 346 StandardError=journal+console 347 348 [Install] 349 RequiredBy=multi-user.target 350 `, escapedImageURL, userDataOption) 351 352 // make workarounds 353 noopIgnitionConfig := base64.StdEncoding.EncodeToString([]byte(`{"ignition": {"version": "2.1.0"}}`)) 354 coreosCloudInit := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\nexit 0")) 355 356 // make Ignition config 357 b64UserData := base64.StdEncoding.EncodeToString(conf.Bytes()) 358 var buf bytes.Buffer 359 err := json.NewEncoder(&buf).Encode(ignition.Config{ 360 Ignition: ignition.Ignition{ 361 Version: ignition.IgnitionVersion{Major: 2}, 362 }, 363 Storage: ignition.Storage{ 364 Files: []ignition.File{ 365 ignition.File{ 366 Filesystem: "root", 367 Path: "/userdata", 368 Contents: ignition.FileContents{ 369 Source: ignition.Url{ 370 Scheme: "data", 371 Opaque: ";base64," + b64UserData, 372 }, 373 }, 374 Mode: 0644, 375 }, 376 ignition.File{ 377 Filesystem: "root", 378 Path: "/noop.ign", 379 Contents: ignition.FileContents{ 380 Source: ignition.Url{ 381 Scheme: "data", 382 Opaque: ";base64," + noopIgnitionConfig, 383 }, 384 }, 385 Mode: 0644, 386 }, 387 ignition.File{ 388 Filesystem: "root", 389 Path: "/root/bin/coreos-cloudinit", 390 Contents: ignition.FileContents{ 391 Source: ignition.Url{ 392 Scheme: "data", 393 Opaque: ";base64," + coreosCloudInit, 394 }, 395 }, 396 Mode: 0755, 397 }, 398 }, 399 }, 400 Systemd: ignition.Systemd{ 401 Units: []ignition.SystemdUnit{ 402 ignition.SystemdUnit{ 403 // don't appear to be running while install is in progress 404 Name: "sshd.socket", 405 Mask: true, 406 }, 407 ignition.SystemdUnit{ 408 // future-proofing 409 Name: "sshd.service", 410 Mask: true, 411 }, 412 ignition.SystemdUnit{ 413 // allow remote detection of install in progress 414 Name: "discard.socket", 415 Enable: true, 416 Contents: discardSocketUnit, 417 }, 418 ignition.SystemdUnit{ 419 Name: "discard@.service", 420 Contents: discardServiceUnit, 421 }, 422 ignition.SystemdUnit{ 423 Name: "coreos-install.service", 424 Enable: true, 425 Contents: installUnit, 426 }, 427 }, 428 }, 429 }) 430 if err != nil { 431 return "", fmt.Errorf("encoding Ignition config: %v", err) 432 } 433 434 return buf.String(), nil 435 } 436 437 func (a *API) uploadObject(hostname, contentType string, data []byte) (string, string, error) { 438 if hostname == "" { 439 hostname = "mantle" 440 } 441 b := make([]byte, 5) 442 rand.Read(b) 443 name := fmt.Sprintf("%s-%x", hostname, b) 444 445 obj := gs.Object{ 446 Name: a.bucket.Prefix() + name, 447 ContentType: contentType, 448 } 449 err := a.bucket.Upload(context.TODO(), &obj, bytes.NewReader(data)) 450 if err != nil { 451 return "", "", fmt.Errorf("uploading object: %v", err) 452 } 453 454 // HTTPS causes iPXE to fail on a "permission denied" error 455 url := fmt.Sprintf("http://storage-download.googleapis.com/%v/%v", a.bucket.Name(), obj.Name) 456 return obj.Name, url, nil 457 } 458 459 func (a *API) ipxeScript(userdataURL string) string { 460 return fmt.Sprintf(`#!ipxe 461 set base-url %s 462 kernel ${base-url}/coreos_production_pxe.vmlinuz initrd=coreos_production_pxe_image.cpio.gz coreos.first_boot=1 coreos.config.url=%s console=%s 463 initrd ${base-url}/coreos_production_pxe_image.cpio.gz 464 boot`, strings.TrimRight(a.opts.InstallerImageBaseURL, "/"), userdataURL, linuxConsole[a.opts.Board]) 465 } 466 467 // device creation seems a bit flaky, so try a few times 468 func (a *API) createDevice(hostname, ipxeScriptURL string) (device *packngo.Device, err error) { 469 for tries := apiRetries; tries >= 0; tries-- { 470 var response *packngo.Response 471 device, response, err = a.c.Devices.Create(&packngo.DeviceCreateRequest{ 472 ProjectID: a.opts.Project, 473 Facility: a.opts.Facility, 474 Plan: a.opts.Plan, 475 BillingCycle: "hourly", 476 Hostname: hostname, 477 OS: "custom_ipxe", 478 IPXEScriptURL: ipxeScriptURL, 479 Tags: []string{"mantle"}, 480 }) 481 if err == nil || response.StatusCode != 500 { 482 return 483 } 484 if tries > 0 { 485 time.Sleep(apiRetryInterval) 486 } 487 } 488 return 489 } 490 491 func (a *API) startConsole(deviceID string, console Console) error { 492 ready := make(chan error) 493 494 runner := func() error { 495 defer console.Close() 496 497 client, err := console.SSHClient("sos."+a.opts.Facility+".packet.net", deviceID) 498 if err != nil { 499 return fmt.Errorf("couldn't create SSH client for %s console: %v", deviceID, err) 500 } 501 defer client.Close() 502 503 session, err := client.NewSession() 504 if err != nil { 505 return fmt.Errorf("couldn't create SSH session for %s console: %v", deviceID, err) 506 } 507 defer session.Close() 508 509 reader, writer := io.Pipe() 510 defer writer.Close() 511 512 session.Stdin = reader 513 session.Stdout = console 514 if err := session.Shell(); err != nil { 515 return fmt.Errorf("couldn't start shell for %s console: %v", deviceID, err) 516 } 517 518 // cause startConsole to return 519 ready <- nil 520 521 err = session.Wait() 522 _, ok := err.(*ssh.ExitMissingError) 523 if err != nil && !ok { 524 plog.Errorf("%s console session failed: %v", deviceID, err) 525 } 526 return nil 527 } 528 go func() { 529 err := runner() 530 if err != nil { 531 ready <- err 532 } 533 }() 534 535 return <-ready 536 } 537 538 func (a *API) waitForActive(deviceID string) (*packngo.Device, error) { 539 var device *packngo.Device 540 err := util.WaitUntilReady(launchTimeout, launchPollInterval, func() (bool, error) { 541 var err error 542 device, _, err = a.c.Devices.Get(deviceID) 543 if err != nil { 544 return false, fmt.Errorf("querying device: %v", err) 545 } 546 return device.State == "active", nil 547 }) 548 if err != nil { 549 return nil, err 550 } 551 return device, nil 552 } 553 554 // Connect to the discard port and wait for the connection to close, 555 // indicating that install is complete. 556 func waitForInstall(address string) (err error) { 557 deadline := time.Now().Add(installTimeout) 558 dialer := net.Dialer{ 559 Timeout: installPollInterval, 560 } 561 for tries := installTimeout / installPollInterval; tries >= 0; tries-- { 562 var conn net.Conn 563 start := time.Now() 564 conn, err = dialer.Dial("tcp", address+":9") 565 if err == nil { 566 defer conn.Close() 567 conn.SetDeadline(deadline) 568 _, err = conn.Read([]byte{0}) 569 if err == io.EOF { 570 err = nil 571 } 572 return 573 } 574 if tries > 0 { 575 // If Dial returned an error before the timeout, 576 // e.g. because the device returned ECONNREFUSED, 577 // wait out the rest of the interval. 578 time.Sleep(installPollInterval - time.Now().Sub(start)) 579 } 580 } 581 return 582 } 583 584 func (a *API) GC(gracePeriod time.Duration) error { 585 threshold := time.Now().Add(-gracePeriod) 586 587 page := packngo.ListOptions{ 588 Page: 1, 589 PerPage: 1000, 590 } 591 592 for { 593 devices, _, err := a.c.Devices.List(a.opts.Project, &page) 594 if err != nil { 595 return fmt.Errorf("listing devices: %v", err) 596 } 597 for _, device := range devices { 598 tagged := false 599 for _, tag := range device.Tags { 600 if tag == "mantle" { 601 tagged = true 602 break 603 } 604 } 605 if !tagged { 606 continue 607 } 608 609 switch device.State { 610 case "queued", "provisioning": 611 continue 612 } 613 614 if device.Locked { 615 continue 616 } 617 618 created, err := time.Parse(time.RFC3339, device.Created) 619 if err != nil { 620 return fmt.Errorf("couldn't parse %q: %v", device.Created, err) 621 } 622 if created.After(threshold) { 623 continue 624 } 625 626 if err := a.DeleteDevice(device.ID); err != nil { 627 return fmt.Errorf("couldn't delete device %v: %v", device.ID, err) 628 } 629 } 630 if len(devices) < page.PerPage { 631 return nil 632 } 633 page.Page += 1 634 } 635 }