k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/remote/gce/gce_runner.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package gce 18 19 import ( 20 "encoding/base64" 21 "encoding/json" 22 "errors" 23 "flag" 24 "fmt" 25 "os" 26 "path/filepath" 27 "regexp" 28 "sort" 29 "strings" 30 "time" 31 32 "k8s.io/kubernetes/test/e2e_node/remote" 33 34 "github.com/google/uuid" 35 "k8s.io/apimachinery/pkg/util/wait" 36 "k8s.io/klog/v2" 37 "sigs.k8s.io/yaml" 38 ) 39 40 var _ remote.Runner = (*GCERunner)(nil) 41 42 func init() { 43 remote.RegisterRunner("gce", NewGCERunner) 44 } 45 46 // envs is the type used to collect all node envs. The key is the env name, 47 // and the value is the env value 48 type envs map[string]string 49 50 // String function of flag.Value 51 func (e *envs) String() string { 52 return fmt.Sprint(*e) 53 } 54 55 // Set function of flag.Value 56 func (e *envs) Set(value string) error { 57 if value == "" { 58 return nil 59 } 60 kv := strings.SplitN(value, "=", 2) 61 if len(kv) != 2 { 62 return fmt.Errorf("invalid env string %s", value) 63 } 64 emap := *e 65 emap[kv[0]] = kv[1] 66 return nil 67 } 68 69 // nodeEnvs is the node envs from the flag `node-env`. 70 var nodeEnvs = make(envs) 71 72 var project = flag.String("project", "", "gce project the hosts live in (gce)") 73 var zone = flag.String("zone", "", "gce zone that the hosts live in (gce)") 74 var instanceMetadata = flag.String("instance-metadata", "", "key/value metadata for instances separated by '=' or '<', 'k=v' means the key is 'k' and the value is 'v'; 'k<p' means the key is 'k' and the value is extracted from the local path 'p', e.g. k1=v1,k2<p2 (gce)") 75 var imageProject = flag.String("image-project", "", "gce project the hosts live in (gce)") 76 var instanceType = flag.String("instance-type", "e2-medium", "GCP Machine type to use for test") 77 var preemptibleInstances = flag.Bool("preemptible-instances", false, "If true, gce instances will be configured to be preemptible (gce)") 78 79 func init() { 80 flag.Var(&nodeEnvs, "node-env", "An environment variable passed to instance as metadata, e.g. when '--node-env=PATH=/usr/bin' is specified, there will be an extra instance metadata 'PATH=/usr/bin'.") 81 } 82 83 type GCERunner struct { 84 cfg remote.Config 85 gceImages *internalGCEImageConfig 86 } 87 88 const ( 89 defaultGCEMachine = "e2-standard-2" 90 ) 91 92 func NewGCERunner(cfg remote.Config) remote.Runner { 93 if cfg.InstanceNamePrefix == "" { 94 cfg.InstanceNamePrefix = "tmp-node-e2e-" + uuid.New().String()[:8] 95 } 96 return &GCERunner{cfg: cfg} 97 } 98 99 func (g *GCERunner) Validate() error { 100 if len(g.cfg.Hosts) == 0 && g.cfg.ImageConfigFile == "" && len(g.cfg.Images) == 0 { 101 klog.Fatalf("Must specify one of --image-config-file, --hosts, --images.") 102 } 103 104 _, err := runGCPCommandWithZones("compute", "instances", "list") 105 if err != nil { 106 klog.Fatalf("While listing GCE instances: %v", err) 107 } 108 109 if g.gceImages, err = g.prepareGceImages(); err != nil { 110 klog.Fatalf("While preparing GCE images: %v", err) 111 } 112 return nil 113 } 114 115 func (g *GCERunner) StartTests(suite remote.TestSuite, archivePath string, results chan *remote.TestResult) (numTests int) { 116 for shortName := range g.gceImages.images { 117 imageConfig := g.gceImages.images[shortName] 118 numTests++ 119 fmt.Printf("Initializing e2e tests using image %s/%s/%s.\n", shortName, imageConfig.project, imageConfig.image) 120 go func(image *internalGCEImage, junitFileName string) { 121 results <- g.testGCEImage(suite, archivePath, image, junitFileName) 122 }(&imageConfig, shortName) 123 } 124 return 125 } 126 127 // Accelerator contains type and count about resource. 128 type Accelerator struct { 129 Type string `json:"type,omitempty"` 130 Count int64 `json:"count,omitempty"` 131 } 132 133 // Resources contains accelerators array. 134 type Resources struct { 135 Accelerators []Accelerator `json:"accelerators,omitempty"` 136 } 137 138 // internalGCEImage is an internal GCE image representation for E2E node. 139 type internalGCEImage struct { 140 image string 141 // imageDesc is the description of the image. If empty, the value in the 142 // 'image' will be used. 143 imageDesc string 144 kernelArguments []string 145 project string 146 resources Resources 147 metadata *gceMetadata 148 machine string 149 } 150 151 type internalGCEImageConfig struct { 152 images map[string]internalGCEImage 153 } 154 155 // GCEImageConfig specifies what images should be run and how for these tests. 156 // It can be created via the `--images` and `--image-project` flags, or by 157 // specifying the `--image-config-file` flag, pointing to a json or yaml file 158 // of the form: 159 // 160 // images: 161 // short-name: 162 // image: gce-image-name 163 // project: gce-image-project 164 // machine: for benchmark only, the machine type (GCE instance) to run test 165 // tests: for benchmark only, a list of ginkgo focus strings to match tests 166 // 167 // TODO(coufon): replace 'image' with 'node' in configurations 168 // and we plan to support testing custom machines other than GCE by specifying Host 169 type GCEImageConfig struct { 170 Images map[string]GCEImage `json:"images"` 171 } 172 173 // GCEImage contains some information about GCE Image. 174 type GCEImage struct { 175 Image string `json:"image,omitempty"` 176 ImageRegex string `json:"image_regex,omitempty"` 177 // ImageFamily is the image family to use. The latest image from the image family will be used, e.g cos-81-lts. 178 ImageFamily string `json:"image_family,omitempty"` 179 ImageDesc string `json:"image_description,omitempty"` 180 KernelArguments []string `json:"kernel_arguments,omitempty"` 181 Project string `json:"project"` 182 Metadata string `json:"metadata"` 183 Machine string `json:"machine,omitempty"` 184 Resources Resources `json:"resources,omitempty"` 185 } 186 187 // Returns an image name based on regex and given GCE project. 188 func (g *GCERunner) getGCEImage(imageRegex, imageFamily string, project string) (string, error) { 189 data, err := runGCPCommandNoProject("compute", "images", "list", 190 "--format=json", "--project="+project) 191 if err != nil { 192 return "", fmt.Errorf("failed to list images in project %q: %w", project, err) 193 } 194 var images []gceImage 195 err = json.Unmarshal(data, &images) 196 if err != nil { 197 return "", fmt.Errorf("failed to parse images: %w", err) 198 } 199 200 imageObjs := []imageObj{} 201 imageRe := regexp.MustCompile(imageRegex) 202 for _, instance := range images { 203 if imageRegex != "" && !imageRe.MatchString(instance.Name) { 204 continue 205 } 206 if imageFamily != "" && instance.Family != imageFamily { 207 continue 208 } 209 creationTime, err := time.Parse(time.RFC3339, instance.CreationTimestamp) 210 if err != nil { 211 return "", fmt.Errorf("failed to parse instance creation timestamp %q: %w", instance.CreationTimestamp, err) 212 } 213 io := imageObj{ 214 creationTime: creationTime, 215 name: instance.Name, 216 } 217 imageObjs = append(imageObjs, io) 218 } 219 220 // Pick the latest image after sorting. 221 sort.Sort(byCreationTime(imageObjs)) 222 if len(imageObjs) > 0 { 223 klog.V(4).Infof("found images %+v based on regex %q and family %q in project %q", imageObjs, imageRegex, imageFamily, project) 224 return imageObjs[0].name, nil 225 } 226 return "", fmt.Errorf("found zero images based on regex %q and family %q in project %q", imageRegex, imageFamily, project) 227 } 228 229 func (g *GCERunner) prepareGceImages() (*internalGCEImageConfig, error) { 230 gceImages := &internalGCEImageConfig{ 231 images: make(map[string]internalGCEImage), 232 } 233 234 // Parse images from given config file and convert them to internalGCEImage. 235 if g.cfg.ImageConfigFile != "" { 236 configPath := g.cfg.ImageConfigFile 237 if g.cfg.ImageConfigDir != "" { 238 configPath = filepath.Join(g.cfg.ImageConfigDir, g.cfg.ImageConfigFile) 239 } 240 241 imageConfigData, err := os.ReadFile(configPath) 242 if err != nil { 243 return nil, fmt.Errorf("Could not read image config file provided: %w", err) 244 } 245 // Unmarshal the given image config file. All images for this test run will be organized into a map. 246 // shortName->GCEImage, e.g cos-stable->cos-stable-81-12871-103-0. 247 externalImageConfig := GCEImageConfig{Images: make(map[string]GCEImage)} 248 err = yaml.Unmarshal(imageConfigData, &externalImageConfig) 249 if err != nil { 250 return nil, fmt.Errorf("Could not parse image config file: %w", err) 251 } 252 253 for shortName, imageConfig := range externalImageConfig.Images { 254 var image string 255 if (imageConfig.ImageRegex != "" || imageConfig.ImageFamily != "") && imageConfig.Image == "" { 256 image, err = g.getGCEImage(imageConfig.ImageRegex, imageConfig.ImageFamily, imageConfig.Project) 257 if err != nil { 258 return nil, fmt.Errorf("Could not retrieve a image based on image regex %q and family %q: %v", 259 imageConfig.ImageRegex, imageConfig.ImageFamily, err) 260 } 261 } else { 262 image = imageConfig.Image 263 } 264 // Convert the given image into an internalGCEImage. 265 metadata := imageConfig.Metadata 266 if len(strings.TrimSpace(*instanceMetadata)) > 0 { 267 metadata += "," + *instanceMetadata 268 } 269 gceImage := internalGCEImage{ 270 image: image, 271 imageDesc: imageConfig.ImageDesc, 272 project: imageConfig.Project, 273 metadata: g.getImageMetadata(metadata), 274 kernelArguments: imageConfig.KernelArguments, 275 machine: imageConfig.Machine, 276 resources: imageConfig.Resources, 277 } 278 if gceImage.imageDesc == "" { 279 gceImage.imageDesc = gceImage.image 280 } 281 gceImages.images[shortName] = gceImage 282 } 283 } 284 285 // Allow users to specify additional images via cli flags for local testing 286 // convenience; merge in with config file 287 if len(g.cfg.Images) > 0 { 288 if *imageProject == "" { 289 klog.Fatal("Must specify --image-project if you specify --images") 290 } 291 for _, image := range g.cfg.Images { 292 gceImage := internalGCEImage{ 293 image: image, 294 project: *imageProject, 295 metadata: g.getImageMetadata(*instanceMetadata), 296 } 297 gceImages.images[image] = gceImage 298 } 299 } 300 301 if len(gceImages.images) != 0 && *zone == "" { 302 return nil, errors.New("must specify --zone flag") 303 } 304 // Make sure GCP project is set. Without a project, images can't be retrieved.. 305 for shortName, imageConfig := range gceImages.images { 306 if imageConfig.project == "" { 307 return nil, fmt.Errorf("invalid config for %v; must specify a project", shortName) 308 } 309 } 310 if len(gceImages.images) != 0 { 311 if *project == "" { 312 return nil, errors.New("must specify --project flag to launch images into") 313 } 314 } 315 316 return gceImages, nil 317 } 318 319 type imageObj struct { 320 creationTime time.Time 321 name string 322 } 323 324 type byCreationTime []imageObj 325 326 func (a byCreationTime) Len() int { return len(a) } 327 func (a byCreationTime) Less(i, j int) bool { return a[i].creationTime.After(a[j].creationTime) } 328 func (a byCreationTime) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 329 330 func (g *GCERunner) getImageMetadata(input string) *gceMetadata { 331 if input == "" { 332 return nil 333 } 334 klog.V(3).Infof("parsing instance metadata: %q", input) 335 raw := g.parseInstanceMetadata(input) 336 klog.V(4).Infof("parsed instance metadata: %v", raw) 337 metadataItems := []gceMetadataItems{} 338 for k, v := range raw { 339 metadataItems = append(metadataItems, gceMetadataItems{ 340 Key: k, 341 Value: v, 342 }) 343 } 344 ret := gceMetadata{Items: metadataItems} 345 return &ret 346 } 347 348 func (g *GCERunner) DeleteGCEInstance(host string) { 349 klog.Infof("Deleting instance %q", host) 350 _, err := runGCPCommandWithZone("compute", "instances", "delete", host) 351 if err != nil { 352 klog.Errorf("Error deleting instance %q: %v", host, err) 353 } 354 } 355 356 func (g *GCERunner) parseInstanceMetadata(str string) map[string]string { 357 metadata := make(map[string]string) 358 ss := strings.Split(str, ",") 359 for _, s := range ss { 360 kv := strings.Split(s, "=") 361 if len(kv) == 2 { 362 metadata[kv[0]] = kv[1] 363 continue 364 } 365 kp := strings.Split(s, "<") 366 if len(kp) != 2 { 367 klog.Fatalf("Invalid instance metadata: %q", s) 368 continue 369 } 370 metaPath := kp[1] 371 if g.cfg.ImageConfigDir != "" { 372 metaPath = filepath.Join(g.cfg.ImageConfigDir, metaPath) 373 } 374 v, err := os.ReadFile(metaPath) 375 if err != nil { 376 klog.Fatalf("Failed to read metadata file %q: %v", metaPath, err) 377 continue 378 } 379 metadata[kp[0]] = ignitionInjectGCEPublicKey(string(v)) 380 } 381 for k, v := range nodeEnvs { 382 metadata[k] = v 383 } 384 return metadata 385 } 386 387 // ignitionInjectGCEPublicKey tries to inject the GCE SSH public key into the 388 // provided ignition file path. 389 // 390 // This will only being done if the job has the 391 // IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE environment variable set, while it 392 // tried to replace the GCE_SSH_PUBLIC_KEY_FILE_CONTENT placeholder. 393 func ignitionInjectGCEPublicKey(content string) string { 394 if os.Getenv("IGNITION_INJECT_GCE_SSH_PUBLIC_KEY_FILE") == "" { 395 return content 396 } 397 398 klog.Infof("Injecting SSH public key into ignition") 399 400 const publicKeyEnv = "GCE_SSH_PUBLIC_KEY_FILE" 401 sshPublicKeyFile := os.Getenv(publicKeyEnv) 402 if sshPublicKeyFile == "" { 403 klog.Errorf("Environment variable %s is not set", publicKeyEnv) 404 os.Exit(1) 405 } 406 407 sshPublicKey, err := os.ReadFile(sshPublicKeyFile) 408 if err != nil { 409 klog.ErrorS(err, "unable to read SSH public key file") 410 os.Exit(1) 411 } 412 413 const sshPublicKeyFileContentMarker = "GCE_SSH_PUBLIC_KEY_FILE_CONTENT" 414 key := base64.StdEncoding.EncodeToString(sshPublicKey) 415 base64Marker := base64.StdEncoding.EncodeToString([]byte(sshPublicKeyFileContentMarker)) 416 replacer := strings.NewReplacer( 417 sshPublicKeyFileContentMarker, key, 418 base64Marker, key, 419 ) 420 return replacer.Replace(content) 421 } 422 423 // Provision a gce instance using image and run the tests in archive against the instance. 424 // Delete the instance afterward. 425 func (g *GCERunner) testGCEImage(suite remote.TestSuite, archivePath string, imageConfig *internalGCEImage, junitFileName string) *remote.TestResult { 426 ginkgoFlagsStr := g.cfg.GinkgoFlags 427 428 host, err := g.createGCEInstance(imageConfig) 429 if g.cfg.DeleteInstances { 430 defer g.DeleteGCEInstance(host) 431 } 432 if err != nil { 433 return &remote.TestResult{ 434 Err: fmt.Errorf("unable to create gce instance with running docker daemon for image %s. %v", imageConfig.image, err), 435 } 436 } 437 438 // Only delete the files if we are keeping the instance and want it cleaned up. 439 // If we are going to delete the instance, don't bother with cleaning up the files 440 deleteFiles := !g.cfg.DeleteInstances && g.cfg.Cleanup 441 442 if err = g.registerGceHostIP(host); err != nil { 443 return &remote.TestResult{ 444 Err: err, 445 Host: host, 446 ExitOK: false, 447 } 448 } 449 450 output, exitOk, err := remote.RunRemote(remote.RunRemoteConfig{ 451 Suite: suite, 452 Archive: archivePath, 453 Host: host, 454 Cleanup: deleteFiles, 455 ImageDesc: imageConfig.imageDesc, 456 JunitFileName: junitFileName, 457 TestArgs: g.cfg.TestArgs, 458 GinkgoArgs: ginkgoFlagsStr, 459 SystemSpecName: g.cfg.SystemSpecName, 460 ExtraEnvs: g.cfg.ExtraEnvs, 461 RuntimeConfig: g.cfg.RuntimeConfig, 462 }) 463 result := remote.TestResult{ 464 Output: output, 465 Err: err, 466 Host: host, 467 ExitOK: exitOk, 468 } 469 470 // This is a temporary solution to collect serial node serial log. Only port 1 contains useful information. 471 // TODO(random-liu): Extract out and unify log collection logic with cluste e2e. 472 contents, err := g.getSerialOutput(host) 473 if err != nil { 474 klog.Errorf("Failed to get serial Output from node %q : %v", host, err) 475 } 476 logFilename := "serial-1.log" 477 err = remote.WriteLog(host, logFilename, contents) 478 if err != nil { 479 klog.Errorf("Failed to write serial Output from node %q to %q: %v", host, logFilename, err) 480 } 481 return &result 482 } 483 484 // Provision a gce instance using image 485 func (g *GCERunner) createGCEInstance(imageConfig *internalGCEImage) (string, error) { 486 data, err := runGCPCommand("compute", "project-info", "describe", "--format=json", "--project="+*project) 487 if err != nil { 488 return "", fmt.Errorf("failed to get project info for %q: %w", *project, err) 489 } 490 491 var p projectInfo 492 err = json.Unmarshal(data, &p) 493 if err != nil { 494 return "", fmt.Errorf("failed parse project info %q: %w", *project, err) 495 } 496 // Use default service account 497 serviceAccount := p.DefaultServiceAccount 498 klog.V(1).Infof("Creating instance %+v with service account %q", *imageConfig, serviceAccount) 499 name := g.imageToInstanceName(imageConfig) 500 501 diskArgs := []string{ 502 "image-project=" + imageConfig.project, 503 "image=" + imageConfig.image, 504 "type=pd-standard", 505 "auto-delete=yes", 506 "boot=yes", 507 "size=20GB", 508 } 509 510 createArgs := []string{"compute", "instances", "create"} 511 createArgs = append(createArgs, name) 512 createArgs = append(createArgs, "--machine-type="+g.machineType(imageConfig.machine)) 513 createArgs = append(createArgs, "--create-disk="+strings.Join(diskArgs, ",")) 514 createArgs = append(createArgs, "--service-account="+serviceAccount) 515 if *preemptibleInstances { 516 createArgs = append(createArgs, "--preemptible") 517 } 518 if len(imageConfig.resources.Accelerators) > 0 { 519 createArgs = append(createArgs, "--maintenance-policy=TERMINATE") 520 createArgs = append(createArgs, "--restart-on-failure") 521 for _, accelerator := range imageConfig.resources.Accelerators { 522 createArgs = append(createArgs, 523 fmt.Sprintf("--accelerator=count=%d,type=%s", accelerator.Count, accelerator.Type)) 524 } 525 } 526 if imageConfig.metadata != nil { 527 var itemArgs []string 528 var itemFileArgs []string 529 for _, item := range imageConfig.metadata.Items { 530 if strings.HasPrefix(item.Key, "user-") || strings.HasPrefix(item.Key, "startup-") || 531 strings.HasPrefix(item.Key, "containerd-") || strings.HasPrefix(item.Key, "cni-") || 532 strings.ContainsAny(item.Value, ",:") { 533 dataFile, err := os.CreateTemp("", "metadata") 534 if err != nil { 535 return "", fmt.Errorf("unable to create temp file %v", err) 536 } 537 defer os.Remove(dataFile.Name()) // clean up 538 if err = dataFile.Close(); err != nil { 539 return "", fmt.Errorf("unable to close temp file %w", err) 540 } 541 542 if err = os.WriteFile(dataFile.Name(), []byte(item.Value), 0666); err != nil { 543 return "", fmt.Errorf("could not write contents of metadata item into file %v", err) 544 } 545 itemFileArgs = append(itemFileArgs, item.Key+"="+dataFile.Name()) 546 } else { 547 itemArgs = append(itemArgs, item.Key+"="+item.Value) 548 } 549 } 550 if len(itemArgs) > 0 { 551 createArgs = append(createArgs, "--metadata="+strings.Join(itemArgs, ",")) 552 } 553 if len(itemFileArgs) > 0 { 554 createArgs = append(createArgs, "--metadata-from-file="+strings.Join(itemFileArgs, ",")) 555 } 556 } 557 558 if _, err := getGCEInstance(name); err != nil { 559 fmt.Printf("Running gcloud with parameters : %#v\n", createArgs) 560 _, err := runGCPCommandWithZone(createArgs...) 561 if err != nil { 562 fmt.Println(err) 563 return "", fmt.Errorf("failed to create instance in project %q: %w", *project, err) 564 } 565 } 566 567 instanceRunning := false 568 var instance *gceInstance 569 for i := 0; i < 30 && !instanceRunning; i++ { 570 if i > 0 { 571 time.Sleep(time.Second * 20) 572 } 573 574 instance, err := getGCEInstance(name) 575 if err != nil { 576 continue 577 } 578 if strings.ToUpper(instance.Status) != "RUNNING" { 579 _ = fmt.Errorf("instance %s not in state RUNNING, was %s", name, instance.Status) 580 continue 581 } 582 externalIP := g.getExternalIP(instance) 583 if len(externalIP) > 0 { 584 remote.AddHostnameIP(name, externalIP) 585 } 586 587 var output string 588 output, err = remote.SSH(name, "sh", "-c", 589 "'systemctl list-units --type=service --state=running | grep -e containerd -e crio'") 590 if err != nil { 591 _ = fmt.Errorf("instance %s not running containerd/crio daemon - Command failed: %s", name, output) 592 continue 593 } 594 if !strings.Contains(output, "containerd.service") && 595 !strings.Contains(output, "crio.service") { 596 _ = fmt.Errorf("instance %s not running containerd/crio daemon: %s", name, output) 597 continue 598 } 599 instanceRunning = true 600 } 601 // If instance didn't reach running state in time, return with error now. 602 if err != nil { 603 return name, err 604 } 605 // Instance reached running state in time, make sure that cloud-init is complete 606 if g.isCloudInitUsed(imageConfig.metadata) { 607 cloudInitFinished := false 608 for i := 0; i < 60 && !cloudInitFinished; i++ { 609 if i > 0 { 610 time.Sleep(time.Second * 20) 611 } 612 var finished string 613 finished, err = remote.SSH(name, "ls", "/var/lib/cloud/instance/boot-finished") 614 if err != nil { 615 err = fmt.Errorf("instance %s has not finished cloud-init script: %s", name, finished) 616 continue 617 } 618 cloudInitFinished = true 619 } 620 } 621 622 // apply additional kernel arguments to the instance 623 if len(imageConfig.kernelArguments) > 0 { 624 klog.Info("Update kernel arguments") 625 if err := g.updateKernelArguments(instance, imageConfig.image, imageConfig.kernelArguments); err != nil { 626 return name, err 627 } 628 } 629 630 return name, err 631 } 632 633 func (g *GCERunner) isCloudInitUsed(metadata *gceMetadata) bool { 634 if metadata == nil { 635 return false 636 } 637 for _, item := range metadata.Items { 638 if item.Key == "user-data" && item.Value != "" && strings.HasPrefix(item.Value, "#cloud-config") { 639 return true 640 } 641 } 642 return false 643 } 644 645 func (g *GCERunner) imageToInstanceName(imageConfig *internalGCEImage) string { 646 if imageConfig.machine == "" { 647 return g.cfg.InstanceNamePrefix + "-" + imageConfig.image 648 } 649 // For benchmark test, node name has the format 'machine-image-uuid' to run 650 // different machine types with the same image in parallel 651 name := imageConfig.machine + "-" + imageConfig.image + "-" + uuid.New().String()[:8] 652 // Sometimes the image is too long, we need instance names to have a max length of 63 653 if len(name) > 63 { 654 return name[:63] 655 } 656 return name 657 } 658 659 func (g *GCERunner) registerGceHostIP(host string) error { 660 instance, err := getGCEInstance(host) 661 if err != nil { 662 return err 663 } 664 if strings.ToUpper(instance.Status) != "RUNNING" { 665 return fmt.Errorf("instance %s not in state RUNNING, was %s", host, instance.Status) 666 } 667 externalIP := g.getExternalIP(instance) 668 if len(externalIP) > 0 { 669 remote.AddHostnameIP(host, externalIP) 670 } 671 return nil 672 } 673 674 func (g *GCERunner) getExternalIP(instance *gceInstance) string { 675 for i := range instance.NetworkInterfaces { 676 ni := instance.NetworkInterfaces[i] 677 for j := range ni.AccessConfigs { 678 ac := ni.AccessConfigs[j] 679 if len(ac.NatIP) > 0 { 680 return ac.NatIP 681 } 682 } 683 } 684 return "" 685 } 686 687 func (g *GCERunner) updateKernelArguments(instance *gceInstance, image string, kernelArgs []string) error { 688 kernelArgsString := strings.Join(kernelArgs, " ") 689 690 var cmd []string 691 if strings.Contains(image, "cos") { 692 cmd = []string{ 693 "dir=$(mktemp -d)", 694 "mount /dev/sda12 ${dir}", 695 fmt.Sprintf("sed -i -e \"s|cros_efi|cros_efi %s|g\" ${dir}/efi/boot/grub.cfg", kernelArgsString), 696 "umount ${dir}", 697 "rmdir ${dir}", 698 } 699 } 700 701 if strings.Contains(image, "ubuntu") { 702 cmd = []string{ 703 fmt.Sprintf("echo \"GRUB_CMDLINE_LINUX_DEFAULT=%s ${GRUB_CMDLINE_LINUX_DEFAULT}\" > /etc/default/grub.d/99-additional-arguments.cfg", kernelArgsString), 704 "/usr/sbin/update-grub", 705 } 706 } 707 708 if len(cmd) == 0 { 709 klog.Warningf("The image %s does not support adding an additional kernel arguments", image) 710 return nil 711 } 712 713 out, err := remote.SSH(instance.Name, "sh", "-c", fmt.Sprintf("'%s'", strings.Join(cmd, "&&"))) 714 if err != nil { 715 klog.Errorf("failed to run command %s: out: %s, Err: %v", cmd, out, err) 716 return err 717 } 718 719 if err := g.rebootInstance(instance); err != nil { 720 return err 721 } 722 723 return nil 724 } 725 726 func (g *GCERunner) machineType(machine string) string { 727 var ret string 728 if machine == "" && *instanceType != "" { 729 ret = *instanceType 730 } else if machine != "" { 731 ret = machine 732 } else { 733 ret = defaultGCEMachine 734 } 735 return ret 736 } 737 738 func (g *GCERunner) rebootInstance(instance *gceInstance) error { 739 // wait until the instance will not response to SSH 740 klog.Info("Reboot the node and wait for instance not to be available via SSH") 741 if waitErr := wait.PollImmediate(5*time.Second, 5*time.Minute, func() (bool, error) { 742 if _, err := remote.SSH(instance.Name, "reboot"); err != nil { 743 return true, nil 744 } 745 746 return false, nil 747 }); waitErr != nil { 748 return fmt.Errorf("the instance %s still response to SSH: %v", instance.Name, waitErr) 749 } 750 751 // wait until the instance will response again to SSH 752 klog.Info("Wait for instance to be available via SSH") 753 if waitErr := wait.PollImmediate(30*time.Second, 5*time.Minute, func() (bool, error) { 754 if _, err := remote.SSH(instance.Name, "sh", "-c", "date"); err != nil { 755 return false, nil 756 } 757 return true, nil 758 }); waitErr != nil { 759 return fmt.Errorf("the instance %s does not response to SSH: %v", instance.Name, waitErr) 760 } 761 762 return nil 763 }