github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/gce/gce.go (about)

     1  // Copyright 2016 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // Package gce provides wrappers around Google Compute Engine (GCE) APIs.
     5  // It is assumed that the program itself also runs on GCE as APIs operate on the current project/zone.
     6  //
     7  // See https://cloud.google.com/compute/docs for details.
     8  // In particular, API reference:
     9  // https://cloud.google.com/compute/docs/reference/latest
    10  // and Go API wrappers:
    11  // https://godoc.org/google.golang.org/api/compute/v1
    12  package gce
    13  
    14  import (
    15  	"context"
    16  	"errors"
    17  	"fmt"
    18  	"io"
    19  	"math/rand"
    20  	"net/http"
    21  	"regexp"
    22  	"strings"
    23  	"time"
    24  
    25  	"golang.org/x/oauth2"
    26  	"golang.org/x/oauth2/google"
    27  	"google.golang.org/api/compute/v1"
    28  	"google.golang.org/api/googleapi"
    29  )
    30  
    31  type Context struct {
    32  	ProjectID  string
    33  	ZoneID     string
    34  	RegionID   string
    35  	Instance   string
    36  	InternalIP string
    37  	ExternalIP string
    38  	Network    string
    39  	Subnetwork string
    40  
    41  	computeService *compute.Service
    42  
    43  	// apiCallTicker ticks regularly, preventing us from accidentally making
    44  	// GCE API calls too quickly. Our quota is 20 QPS, but we limit ourselves
    45  	// to less than that because several independent programs can do API calls.
    46  	apiRateGate <-chan time.Time
    47  }
    48  
    49  type CreateArgs struct {
    50  	Preemptible   bool
    51  	DisplayDevice bool
    52  }
    53  
    54  func NewContext(customZoneID string) (*Context, error) {
    55  	ctx := &Context{
    56  		apiRateGate: time.NewTicker(time.Second).C,
    57  	}
    58  	background := context.Background()
    59  	tokenSource, err := google.DefaultTokenSource(background, compute.CloudPlatformScope)
    60  	if err != nil {
    61  		return nil, fmt.Errorf("failed to get a token source: %w", err)
    62  	}
    63  	httpClient := oauth2.NewClient(background, tokenSource)
    64  	// nolint
    65  	// compute.New is deprecated: please use NewService instead.
    66  	// To provide a custom HTTP client, use option.WithHTTPClient.
    67  	// If you are using google.golang.org/api/googleapis/transport.APIKey,
    68  	// use option.WithAPIKey with NewService instead.
    69  	ctx.computeService, _ = compute.New(httpClient)
    70  	// Obtain project name, zone and current instance IP address.
    71  	ctx.ProjectID, err = ctx.getMeta("project/project-id")
    72  	if err != nil {
    73  		return nil, fmt.Errorf("failed to query gce project-id: %w", err)
    74  	}
    75  	myZoneID, err := ctx.getMeta("instance/zone")
    76  	if err != nil {
    77  		return nil, fmt.Errorf("failed to query gce zone: %w", err)
    78  	}
    79  	if i := strings.LastIndexByte(myZoneID, '/'); i != -1 {
    80  		myZoneID = myZoneID[i+1:] // the query returns some nonsense prefix
    81  	}
    82  	if customZoneID != "" {
    83  		ctx.ZoneID = customZoneID
    84  	} else {
    85  		ctx.ZoneID = myZoneID
    86  	}
    87  	if !validateZone(ctx.ZoneID) {
    88  		return nil, fmt.Errorf("%q is not a valid zone name", ctx.ZoneID)
    89  	}
    90  	ctx.RegionID = zoneToRegion(ctx.ZoneID)
    91  	if ctx.RegionID == "" {
    92  		return nil, fmt.Errorf("failed to extract region id from %s", ctx.ZoneID)
    93  	}
    94  	ctx.Instance, err = ctx.getMeta("instance/name")
    95  	if err != nil {
    96  		return nil, fmt.Errorf("failed to query gce instance name: %w", err)
    97  	}
    98  	inst, err := ctx.computeService.Instances.Get(ctx.ProjectID, myZoneID, ctx.Instance).Do()
    99  	if err != nil {
   100  		return nil, fmt.Errorf("error getting instance info: %w", err)
   101  	}
   102  	for _, iface := range inst.NetworkInterfaces {
   103  		if strings.HasPrefix(iface.NetworkIP, "10.") {
   104  			ctx.InternalIP = iface.NetworkIP
   105  		}
   106  		for _, ac := range iface.AccessConfigs {
   107  			if ac.NatIP != "" {
   108  				ctx.ExternalIP = ac.NatIP
   109  			}
   110  		}
   111  		ctx.Network = iface.Network
   112  		ctx.Subnetwork = iface.Subnetwork
   113  	}
   114  	if ctx.InternalIP == "" {
   115  		return nil, fmt.Errorf("failed to get current instance internal IP")
   116  	}
   117  	return ctx, nil
   118  }
   119  
   120  func (ctx *Context) CreateInstance(name, machineType, image, sshkey string,
   121  	preemptible, displayDevice bool) (string, error) {
   122  	prefix := "https://www.googleapis.com/compute/v1/projects/" + ctx.ProjectID
   123  	sshkeyAttr := "syzkaller:" + sshkey
   124  	oneAttr := "1"
   125  	falseAttr := false
   126  	instance := &compute.Instance{
   127  		Name:        name,
   128  		Description: "syzkaller worker",
   129  		MachineType: prefix + "/zones/" + ctx.ZoneID + "/machineTypes/" + machineType,
   130  		Disks: []*compute.AttachedDisk{
   131  			{
   132  				AutoDelete: true,
   133  				Boot:       true,
   134  				Type:       "PERSISTENT",
   135  				InitializeParams: &compute.AttachedDiskInitializeParams{
   136  					DiskName:    name,
   137  					SourceImage: prefix + "/global/images/" + image,
   138  				},
   139  			},
   140  		},
   141  		Metadata: &compute.Metadata{
   142  			Items: []*compute.MetadataItems{
   143  				{
   144  					Key:   "ssh-keys",
   145  					Value: &sshkeyAttr,
   146  				},
   147  				{
   148  					Key:   "serial-port-enable",
   149  					Value: &oneAttr,
   150  				},
   151  			},
   152  		},
   153  		NetworkInterfaces: []*compute.NetworkInterface{
   154  			{
   155  				Network:    ctx.Network,
   156  				Subnetwork: ctx.Subnetwork,
   157  			},
   158  		},
   159  		Scheduling: &compute.Scheduling{
   160  			AutomaticRestart:  &falseAttr,
   161  			Preemptible:       preemptible,
   162  			OnHostMaintenance: "TERMINATE",
   163  		},
   164  		DisplayDevice: &compute.DisplayDevice{
   165  			EnableDisplay: displayDevice,
   166  		},
   167  	}
   168  retry:
   169  	if !instance.Scheduling.Preemptible && strings.HasPrefix(machineType, "e2-") {
   170  		// Otherwise we get "Error 400: Efficient instances do not support
   171  		// onHostMaintenance=TERMINATE unless they are preemptible".
   172  		instance.Scheduling.OnHostMaintenance = "MIGRATE"
   173  	}
   174  	var op *compute.Operation
   175  	err := ctx.apiCall(func() (err error) {
   176  		op, err = ctx.computeService.Instances.Insert(ctx.ProjectID, ctx.ZoneID, instance).Do()
   177  		return
   178  	})
   179  	if err != nil {
   180  		return "", fmt.Errorf("failed to create instance: %w", err)
   181  	}
   182  	if err := ctx.waitForCompletion("zone", "create instance", op.Name, false); err != nil {
   183  		var resourcePoolExhaustedError resourcePoolExhaustedError
   184  		if errors.As(err, &resourcePoolExhaustedError) && instance.Scheduling.Preemptible {
   185  			instance.Scheduling.Preemptible = false
   186  			goto retry
   187  		}
   188  		return "", err
   189  	}
   190  
   191  	var inst *compute.Instance
   192  	err = ctx.apiCall(func() (err error) {
   193  		inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do()
   194  		return
   195  	})
   196  	if err != nil {
   197  		return "", fmt.Errorf("error getting instance %s details after creation: %w", name, err)
   198  	}
   199  
   200  	// Finds its internal IP.
   201  	ip := ""
   202  	for _, iface := range inst.NetworkInterfaces {
   203  		if strings.HasPrefix(iface.NetworkIP, "10.") {
   204  			ip = iface.NetworkIP
   205  			break
   206  		}
   207  	}
   208  	if ip == "" {
   209  		return "", fmt.Errorf("didn't find instance internal IP address")
   210  	}
   211  	return ip, nil
   212  }
   213  
   214  func (ctx *Context) DeleteInstance(name string, wait bool) error {
   215  	var op *compute.Operation
   216  	err := ctx.apiCall(func() (err error) {
   217  		op, err = ctx.computeService.Instances.Delete(ctx.ProjectID, ctx.ZoneID, name).Do()
   218  		return
   219  	})
   220  	var apiErr *googleapi.Error
   221  	if errors.As(err, &apiErr) && apiErr.Code == 404 {
   222  		return nil
   223  	}
   224  	if err != nil {
   225  		return fmt.Errorf("failed to delete instance: %w", err)
   226  	}
   227  	if wait {
   228  		if err := ctx.waitForCompletion("zone", "delete image", op.Name, true); err != nil {
   229  			return err
   230  		}
   231  	}
   232  	return nil
   233  }
   234  
   235  func (ctx *Context) IsInstanceRunning(name string) bool {
   236  	var inst *compute.Instance
   237  	err := ctx.apiCall(func() (err error) {
   238  		inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do()
   239  		return
   240  	})
   241  	if err != nil {
   242  		return false
   243  	}
   244  	return inst.Status == "RUNNING"
   245  }
   246  
   247  func (ctx *Context) CreateImage(imageName, gcsFile string) error {
   248  	image := &compute.Image{
   249  		Name: imageName,
   250  		RawDisk: &compute.ImageRawDisk{
   251  			Source: "https://storage.googleapis.com/" + gcsFile,
   252  		},
   253  		Licenses: []string{
   254  			"https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx",
   255  		},
   256  	}
   257  	var op *compute.Operation
   258  	err := ctx.apiCall(func() (err error) {
   259  		op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do()
   260  		return
   261  	})
   262  	if err != nil {
   263  		// Try again without the vmx license in case it is not supported.
   264  		image.Licenses = nil
   265  		err := ctx.apiCall(func() (err error) {
   266  			op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do()
   267  			return
   268  		})
   269  		if err != nil {
   270  			return fmt.Errorf("failed to create image: %w", err)
   271  		}
   272  	}
   273  	if err := ctx.waitForCompletion("global", "create image", op.Name, false); err != nil {
   274  		return err
   275  	}
   276  	return nil
   277  }
   278  
   279  func (ctx *Context) DeleteImage(imageName string) error {
   280  	var op *compute.Operation
   281  	err := ctx.apiCall(func() (err error) {
   282  		op, err = ctx.computeService.Images.Delete(ctx.ProjectID, imageName).Do()
   283  		return
   284  	})
   285  	var apiErr *googleapi.Error
   286  	if errors.As(err, &apiErr) && apiErr.Code == 404 {
   287  		return nil
   288  	}
   289  	if err != nil {
   290  		return fmt.Errorf("failed to delete image: %w", err)
   291  	}
   292  	if err := ctx.waitForCompletion("global", "delete image", op.Name, true); err != nil {
   293  		return err
   294  	}
   295  	return nil
   296  }
   297  
   298  type resourcePoolExhaustedError string
   299  
   300  func (err resourcePoolExhaustedError) Error() string {
   301  	return string(err)
   302  }
   303  
   304  func (ctx *Context) waitForCompletion(typ, desc, opName string, ignoreNotFound bool) error {
   305  	time.Sleep(3 * time.Second)
   306  	for {
   307  		time.Sleep(3 * time.Second)
   308  		var op *compute.Operation
   309  		err := ctx.apiCall(func() (err error) {
   310  			switch typ {
   311  			case "global":
   312  				op, err = ctx.computeService.GlobalOperations.Get(ctx.ProjectID, opName).Do()
   313  			case "zone":
   314  				op, err = ctx.computeService.ZoneOperations.Get(ctx.ProjectID, ctx.ZoneID, opName).Do()
   315  			default:
   316  				panic("unknown operation type: " + typ)
   317  			}
   318  			return
   319  		})
   320  		if err != nil {
   321  			return fmt.Errorf("failed to get %v operation %v: %w", desc, opName, err)
   322  		}
   323  		switch op.Status {
   324  		case "PENDING", "RUNNING":
   325  			continue
   326  		case "DONE":
   327  			if op.Error != nil {
   328  				reason := ""
   329  				for _, operr := range op.Error.Errors {
   330  					if operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED" ||
   331  						operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED_WITH_DETAILS" {
   332  						return resourcePoolExhaustedError(fmt.Sprintf("%+v", operr))
   333  					}
   334  					if ignoreNotFound && operr.Code == "RESOURCE_NOT_FOUND" {
   335  						return nil
   336  					}
   337  					reason += fmt.Sprintf("%+v.", operr)
   338  				}
   339  				return fmt.Errorf("%v operation failed: %v", desc, reason)
   340  			}
   341  			return nil
   342  		default:
   343  			return fmt.Errorf("unknown %v operation status %q: %+v", desc, op.Status, op)
   344  		}
   345  	}
   346  }
   347  
   348  func (ctx *Context) getMeta(path string) (string, error) {
   349  	req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil)
   350  	if err != nil {
   351  		return "", err
   352  	}
   353  	req.Header.Add("Metadata-Flavor", "Google")
   354  	resp, err := http.DefaultClient.Do(req)
   355  	if err != nil {
   356  		return "", err
   357  	}
   358  	defer resp.Body.Close()
   359  	body, err := io.ReadAll(resp.Body)
   360  	if err != nil {
   361  		return "", err
   362  	}
   363  	return string(body), nil
   364  }
   365  
   366  func (ctx *Context) apiCall(fn func() error) error {
   367  	rateLimited := 0
   368  	for {
   369  		<-ctx.apiRateGate
   370  		err := fn()
   371  		if err != nil {
   372  			if strings.Contains(err.Error(), "Rate Limit Exceeded") ||
   373  				strings.Contains(err.Error(), "rateLimitExceeded") {
   374  				rateLimited++
   375  				backoff := time.Duration(float64(rateLimited) * 1e9 * (rand.Float64() + 1))
   376  				time.Sleep(backoff)
   377  				if rateLimited < 20 {
   378  					continue
   379  				}
   380  			}
   381  		}
   382  		return err
   383  	}
   384  }
   385  
   386  var zoneNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*[-][a-zA-Z0-9]*$")
   387  
   388  func validateZone(zone string) bool {
   389  	return zoneNameRe.MatchString(zone)
   390  }
   391  
   392  var regionNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*")
   393  
   394  func zoneToRegion(zone string) string {
   395  	return regionNameRe.FindString(zone)
   396  }