github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/gce/gce.go (about)

     1  // Copyright 2016 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // Package gce provides wrappers around Google Compute Engine (GCE) APIs.
     5  // It is assumed that the program itself also runs on GCE as APIs operate on the current project/zone.
     6  //
     7  // See https://cloud.google.com/compute/docs for details.
     8  // In particular, API reference:
     9  // https://cloud.google.com/compute/docs/reference/latest
    10  // and Go API wrappers:
    11  // https://godoc.org/google.golang.org/api/compute/v1
    12  package gce
    13  
    14  import (
    15  	"context"
    16  	"errors"
    17  	"fmt"
    18  	"io"
    19  	"math/rand"
    20  	"net/http"
    21  	"regexp"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/google/syzkaller/sys/targets"
    26  	"golang.org/x/oauth2"
    27  	"golang.org/x/oauth2/google"
    28  	"google.golang.org/api/compute/v1"
    29  	"google.golang.org/api/googleapi"
    30  	"google.golang.org/api/option"
    31  )
    32  
    33  type Context struct {
    34  	ProjectID  string
    35  	ZoneID     string
    36  	RegionID   string
    37  	Instance   string
    38  	InternalIP string
    39  	ExternalIP string
    40  	Network    string
    41  	Subnetwork string
    42  
    43  	computeService *compute.Service
    44  
    45  	// apiCallTicker ticks regularly, preventing us from accidentally making
    46  	// GCE API calls too quickly. Our quota is 20 QPS, but we limit ourselves
    47  	// to less than that because several independent programs can do API calls.
    48  	apiRateGate <-chan time.Time
    49  }
    50  
    51  type CreateArgs struct {
    52  	Preemptible   bool
    53  	DisplayDevice bool
    54  }
    55  
    56  func NewContext(customZoneID string) (*Context, error) {
    57  	ctx := &Context{
    58  		apiRateGate: time.NewTicker(time.Second).C,
    59  	}
    60  	background := context.Background()
    61  	tokenSource, err := google.DefaultTokenSource(background, compute.CloudPlatformScope)
    62  	if err != nil {
    63  		return nil, fmt.Errorf("failed to get a token source: %w", err)
    64  	}
    65  	httpClient := oauth2.NewClient(background, tokenSource)
    66  	ctx.computeService, err = compute.NewService(background, option.WithHTTPClient(httpClient))
    67  	if err != nil {
    68  		return nil, fmt.Errorf("failed to create compute service: %w", err)
    69  	}
    70  	// Obtain project name, zone and current instance IP address.
    71  	ctx.ProjectID, err = ctx.getMeta("project/project-id")
    72  	if err != nil {
    73  		return nil, fmt.Errorf("failed to query gce project-id: %w", err)
    74  	}
    75  	myZoneID, err := ctx.getMeta("instance/zone")
    76  	if err != nil {
    77  		return nil, fmt.Errorf("failed to query gce zone: %w", err)
    78  	}
    79  	if i := strings.LastIndexByte(myZoneID, '/'); i != -1 {
    80  		myZoneID = myZoneID[i+1:] // the query returns some nonsense prefix
    81  	}
    82  	if customZoneID != "" {
    83  		ctx.ZoneID = customZoneID
    84  	} else {
    85  		ctx.ZoneID = myZoneID
    86  	}
    87  	if !validateZone(ctx.ZoneID) {
    88  		return nil, fmt.Errorf("%q is not a valid zone name", ctx.ZoneID)
    89  	}
    90  	ctx.RegionID = zoneToRegion(ctx.ZoneID)
    91  	if ctx.RegionID == "" {
    92  		return nil, fmt.Errorf("failed to extract region id from %s", ctx.ZoneID)
    93  	}
    94  	ctx.Instance, err = ctx.getMeta("instance/name")
    95  	if err != nil {
    96  		return nil, fmt.Errorf("failed to query gce instance name: %w", err)
    97  	}
    98  	inst, err := ctx.computeService.Instances.Get(ctx.ProjectID, myZoneID, ctx.Instance).Do()
    99  	if err != nil {
   100  		return nil, fmt.Errorf("error getting instance info: %w", err)
   101  	}
   102  	for _, iface := range inst.NetworkInterfaces {
   103  		if strings.HasPrefix(iface.NetworkIP, "10.") {
   104  			ctx.InternalIP = iface.NetworkIP
   105  		}
   106  		for _, ac := range iface.AccessConfigs {
   107  			if ac.NatIP != "" {
   108  				ctx.ExternalIP = ac.NatIP
   109  			}
   110  		}
   111  		ctx.Network = iface.Network
   112  		ctx.Subnetwork = iface.Subnetwork
   113  	}
   114  	if ctx.InternalIP == "" {
   115  		return nil, fmt.Errorf("failed to get current instance internal IP")
   116  	}
   117  	return ctx, nil
   118  }
   119  
   120  func (ctx *Context) CreateInstance(name, machineType, image, sshkey string,
   121  	preemptible, displayDevice bool) (string, error) {
   122  	prefix := "https://www.googleapis.com/compute/v1/projects/" + ctx.ProjectID
   123  	sshkeyAttr := "syzkaller:" + sshkey
   124  	oneAttr := "1"
   125  	falseAttr := false
   126  	instance := &compute.Instance{
   127  		Name:        name,
   128  		Description: "syzkaller worker",
   129  		MachineType: prefix + "/zones/" + ctx.ZoneID + "/machineTypes/" + machineType,
   130  		Disks: []*compute.AttachedDisk{
   131  			{
   132  				AutoDelete: true,
   133  				Boot:       true,
   134  				Type:       "PERSISTENT",
   135  				DiskSizeGb: int64(diskSizeGB(machineType)),
   136  				InitializeParams: &compute.AttachedDiskInitializeParams{
   137  					DiskName:    name,
   138  					SourceImage: prefix + "/global/images/" + image,
   139  				},
   140  			},
   141  		},
   142  		Metadata: &compute.Metadata{
   143  			Items: []*compute.MetadataItems{
   144  				{
   145  					Key:   "ssh-keys",
   146  					Value: &sshkeyAttr,
   147  				},
   148  				{
   149  					Key:   "serial-port-enable",
   150  					Value: &oneAttr,
   151  				},
   152  			},
   153  		},
   154  		NetworkInterfaces: []*compute.NetworkInterface{
   155  			{
   156  				Network:    ctx.Network,
   157  				Subnetwork: ctx.Subnetwork,
   158  			},
   159  		},
   160  		Scheduling: &compute.Scheduling{
   161  			AutomaticRestart:  &falseAttr,
   162  			Preemptible:       preemptible,
   163  			OnHostMaintenance: "TERMINATE",
   164  		},
   165  		DisplayDevice: &compute.DisplayDevice{
   166  			EnableDisplay: displayDevice,
   167  		},
   168  	}
   169  retry:
   170  	if !instance.Scheduling.Preemptible && strings.HasPrefix(machineType, "e2-") {
   171  		// Otherwise we get "Error 400: Efficient instances do not support
   172  		// onHostMaintenance=TERMINATE unless they are preemptible".
   173  		instance.Scheduling.OnHostMaintenance = "MIGRATE"
   174  	}
   175  	var op *compute.Operation
   176  	err := ctx.apiCall(func() (err error) {
   177  		op, err = ctx.computeService.Instances.Insert(ctx.ProjectID, ctx.ZoneID, instance).Do()
   178  		return
   179  	})
   180  	if err != nil {
   181  		return "", fmt.Errorf("failed to create instance: %w", err)
   182  	}
   183  	if err := ctx.waitForCompletion("zone", "create instance", op.Name, false); err != nil {
   184  		var resourcePoolExhaustedError resourcePoolExhaustedError
   185  		if errors.As(err, &resourcePoolExhaustedError) && instance.Scheduling.Preemptible {
   186  			instance.Scheduling.Preemptible = false
   187  			goto retry
   188  		}
   189  		return "", err
   190  	}
   191  
   192  	var inst *compute.Instance
   193  	err = ctx.apiCall(func() (err error) {
   194  		inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do()
   195  		return
   196  	})
   197  	if err != nil {
   198  		return "", fmt.Errorf("error getting instance %s details after creation: %w", name, err)
   199  	}
   200  
   201  	// Finds its internal IP.
   202  	ip := ""
   203  	for _, iface := range inst.NetworkInterfaces {
   204  		if strings.HasPrefix(iface.NetworkIP, "10.") {
   205  			ip = iface.NetworkIP
   206  			break
   207  		}
   208  	}
   209  	if ip == "" {
   210  		return "", fmt.Errorf("didn't find instance internal IP address")
   211  	}
   212  	return ip, nil
   213  }
   214  
   215  func diskSizeGB(machineType string) int {
   216  	if strings.HasPrefix(machineType, "c4a-") {
   217  		// For C4A machines, the only available disk type is "Hyperdisk Balanced",
   218  		// which must be >= 10GB.
   219  		return 10
   220  	}
   221  	// Use the default value.
   222  	return 0
   223  }
   224  
   225  func (ctx *Context) DeleteInstance(name string, wait bool) error {
   226  	var op *compute.Operation
   227  	err := ctx.apiCall(func() (err error) {
   228  		op, err = ctx.computeService.Instances.Delete(ctx.ProjectID, ctx.ZoneID, name).Do()
   229  		return
   230  	})
   231  	var apiErr *googleapi.Error
   232  	if errors.As(err, &apiErr) && apiErr.Code == 404 {
   233  		return nil
   234  	}
   235  	if err != nil {
   236  		return fmt.Errorf("failed to delete instance: %w", err)
   237  	}
   238  	if wait {
   239  		if err := ctx.waitForCompletion("zone", "delete image", op.Name, true); err != nil {
   240  			return err
   241  		}
   242  	}
   243  	return nil
   244  }
   245  
   246  func (ctx *Context) IsInstanceRunning(name string) bool {
   247  	var inst *compute.Instance
   248  	err := ctx.apiCall(func() (err error) {
   249  		inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do()
   250  		return
   251  	})
   252  	if err != nil {
   253  		return false
   254  	}
   255  	return inst.Status == "RUNNING"
   256  }
   257  
   258  func (ctx *Context) CreateImage(imageName, gcsFile, OS string) error {
   259  	var features []*compute.GuestOsFeature
   260  	if OS == targets.Linux {
   261  		features = []*compute.GuestOsFeature{
   262  			{
   263  				Type: "GVNIC",
   264  			},
   265  		}
   266  	}
   267  	image := &compute.Image{
   268  		Name: imageName,
   269  		RawDisk: &compute.ImageRawDisk{
   270  			Source: "https://storage.googleapis.com/" + gcsFile,
   271  		},
   272  		Licenses: []string{
   273  			"https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx",
   274  		},
   275  		GuestOsFeatures: features,
   276  	}
   277  	var op *compute.Operation
   278  	err := ctx.apiCall(func() (err error) {
   279  		op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do()
   280  		return
   281  	})
   282  	if err != nil {
   283  		// Try again without the vmx license in case it is not supported.
   284  		image.Licenses = nil
   285  		err := ctx.apiCall(func() (err error) {
   286  			op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do()
   287  			return
   288  		})
   289  		if err != nil {
   290  			return fmt.Errorf("failed to create image: %w", err)
   291  		}
   292  	}
   293  	if err := ctx.waitForCompletion("global", "create image", op.Name, false); err != nil {
   294  		return err
   295  	}
   296  	return nil
   297  }
   298  
   299  func (ctx *Context) DeleteImage(imageName string) error {
   300  	var op *compute.Operation
   301  	err := ctx.apiCall(func() (err error) {
   302  		op, err = ctx.computeService.Images.Delete(ctx.ProjectID, imageName).Do()
   303  		return
   304  	})
   305  	var apiErr *googleapi.Error
   306  	if errors.As(err, &apiErr) && apiErr.Code == 404 {
   307  		return nil
   308  	}
   309  	if err != nil {
   310  		return fmt.Errorf("failed to delete image: %w", err)
   311  	}
   312  	if err := ctx.waitForCompletion("global", "delete image", op.Name, true); err != nil {
   313  		return err
   314  	}
   315  	return nil
   316  }
   317  
   318  type resourcePoolExhaustedError string
   319  
   320  func (err resourcePoolExhaustedError) Error() string {
   321  	return string(err)
   322  }
   323  
   324  func (ctx *Context) waitForCompletion(typ, desc, opName string, ignoreNotFound bool) error {
   325  	time.Sleep(3 * time.Second)
   326  	for {
   327  		time.Sleep(3 * time.Second)
   328  		var op *compute.Operation
   329  		err := ctx.apiCall(func() (err error) {
   330  			switch typ {
   331  			case "global":
   332  				op, err = ctx.computeService.GlobalOperations.Get(ctx.ProjectID, opName).Do()
   333  			case "zone":
   334  				op, err = ctx.computeService.ZoneOperations.Get(ctx.ProjectID, ctx.ZoneID, opName).Do()
   335  			default:
   336  				panic("unknown operation type: " + typ)
   337  			}
   338  			return
   339  		})
   340  		if err != nil {
   341  			return fmt.Errorf("failed to get %v operation %v: %w", desc, opName, err)
   342  		}
   343  		switch op.Status {
   344  		case "PENDING", "RUNNING":
   345  			continue
   346  		case "DONE":
   347  			if op.Error != nil {
   348  				reason := ""
   349  				for _, operr := range op.Error.Errors {
   350  					if operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED" ||
   351  						operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED_WITH_DETAILS" {
   352  						return resourcePoolExhaustedError(fmt.Sprintf("%+v", operr))
   353  					}
   354  					if ignoreNotFound && operr.Code == "RESOURCE_NOT_FOUND" {
   355  						return nil
   356  					}
   357  					reason += fmt.Sprintf("%+v.", operr)
   358  				}
   359  				return fmt.Errorf("%v operation failed: %v", desc, reason)
   360  			}
   361  			return nil
   362  		default:
   363  			return fmt.Errorf("unknown %v operation status %q: %+v", desc, op.Status, op)
   364  		}
   365  	}
   366  }
   367  
   368  func (ctx *Context) getMeta(path string) (string, error) {
   369  	req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil)
   370  	if err != nil {
   371  		return "", err
   372  	}
   373  	req.Header.Add("Metadata-Flavor", "Google")
   374  	resp, err := http.DefaultClient.Do(req)
   375  	if err != nil {
   376  		return "", err
   377  	}
   378  	defer resp.Body.Close()
   379  	body, err := io.ReadAll(resp.Body)
   380  	if err != nil {
   381  		return "", err
   382  	}
   383  	return string(body), nil
   384  }
   385  
   386  func (ctx *Context) apiCall(fn func() error) error {
   387  	rateLimited := 0
   388  	for {
   389  		<-ctx.apiRateGate
   390  		err := fn()
   391  		if err != nil {
   392  			if strings.Contains(err.Error(), "Rate Limit Exceeded") ||
   393  				strings.Contains(err.Error(), "rateLimitExceeded") {
   394  				rateLimited++
   395  				backoff := time.Duration(float64(rateLimited) * 1e9 * (rand.Float64() + 1))
   396  				time.Sleep(backoff)
   397  				if rateLimited < 20 {
   398  					continue
   399  				}
   400  			}
   401  		}
   402  		return err
   403  	}
   404  }
   405  
   406  var zoneNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*[-][a-zA-Z0-9]*$")
   407  
   408  func validateZone(zone string) bool {
   409  	return zoneNameRe.MatchString(zone)
   410  }
   411  
   412  var regionNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*")
   413  
   414  func zoneToRegion(zone string) string {
   415  	return regionNameRe.FindString(zone)
   416  }