github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/gce/gce.go (about) 1 // Copyright 2016 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // Package gce provides wrappers around Google Compute Engine (GCE) APIs. 5 // It is assumed that the program itself also runs on GCE as APIs operate on the current project/zone. 6 // 7 // See https://cloud.google.com/compute/docs for details. 8 // In particular, API reference: 9 // https://cloud.google.com/compute/docs/reference/latest 10 // and Go API wrappers: 11 // https://godoc.org/google.golang.org/api/compute/v1 12 package gce 13 14 import ( 15 "context" 16 "errors" 17 "fmt" 18 "io" 19 "math/rand" 20 "net/http" 21 "regexp" 22 "strings" 23 "time" 24 25 "github.com/google/syzkaller/sys/targets" 26 "golang.org/x/oauth2" 27 "golang.org/x/oauth2/google" 28 "google.golang.org/api/compute/v1" 29 "google.golang.org/api/googleapi" 30 "google.golang.org/api/option" 31 ) 32 33 type Context struct { 34 ProjectID string 35 ZoneID string 36 RegionID string 37 Instance string 38 InternalIP string 39 ExternalIP string 40 Network string 41 Subnetwork string 42 43 computeService *compute.Service 44 45 // apiCallTicker ticks regularly, preventing us from accidentally making 46 // GCE API calls too quickly. Our quota is 20 QPS, but we limit ourselves 47 // to less than that because several independent programs can do API calls. 48 apiRateGate <-chan time.Time 49 } 50 51 type CreateArgs struct { 52 Preemptible bool 53 DisplayDevice bool 54 } 55 56 func NewContext(customZoneID string) (*Context, error) { 57 ctx := &Context{ 58 apiRateGate: time.NewTicker(time.Second).C, 59 } 60 background := context.Background() 61 tokenSource, err := google.DefaultTokenSource(background, compute.CloudPlatformScope) 62 if err != nil { 63 return nil, fmt.Errorf("failed to get a token source: %w", err) 64 } 65 httpClient := oauth2.NewClient(background, tokenSource) 66 ctx.computeService, err = compute.NewService(background, option.WithHTTPClient(httpClient)) 67 if err != nil { 68 return nil, fmt.Errorf("failed to create compute service: %w", err) 69 } 70 // Obtain project name, zone and current instance IP address. 71 ctx.ProjectID, err = ctx.getMeta("project/project-id") 72 if err != nil { 73 return nil, fmt.Errorf("failed to query gce project-id: %w", err) 74 } 75 myZoneID, err := ctx.getMeta("instance/zone") 76 if err != nil { 77 return nil, fmt.Errorf("failed to query gce zone: %w", err) 78 } 79 if i := strings.LastIndexByte(myZoneID, '/'); i != -1 { 80 myZoneID = myZoneID[i+1:] // the query returns some nonsense prefix 81 } 82 if customZoneID != "" { 83 ctx.ZoneID = customZoneID 84 } else { 85 ctx.ZoneID = myZoneID 86 } 87 if !validateZone(ctx.ZoneID) { 88 return nil, fmt.Errorf("%q is not a valid zone name", ctx.ZoneID) 89 } 90 ctx.RegionID = zoneToRegion(ctx.ZoneID) 91 if ctx.RegionID == "" { 92 return nil, fmt.Errorf("failed to extract region id from %s", ctx.ZoneID) 93 } 94 ctx.Instance, err = ctx.getMeta("instance/name") 95 if err != nil { 96 return nil, fmt.Errorf("failed to query gce instance name: %w", err) 97 } 98 inst, err := ctx.computeService.Instances.Get(ctx.ProjectID, myZoneID, ctx.Instance).Do() 99 if err != nil { 100 return nil, fmt.Errorf("error getting instance info: %w", err) 101 } 102 for _, iface := range inst.NetworkInterfaces { 103 if strings.HasPrefix(iface.NetworkIP, "10.") { 104 ctx.InternalIP = iface.NetworkIP 105 } 106 for _, ac := range iface.AccessConfigs { 107 if ac.NatIP != "" { 108 ctx.ExternalIP = ac.NatIP 109 } 110 } 111 ctx.Network = iface.Network 112 ctx.Subnetwork = iface.Subnetwork 113 } 114 if ctx.InternalIP == "" { 115 return nil, fmt.Errorf("failed to get current instance internal IP") 116 } 117 return ctx, nil 118 } 119 120 func (ctx *Context) CreateInstance(name, machineType, image, sshkey string, 121 preemptible, displayDevice bool) (string, error) { 122 prefix := "https://www.googleapis.com/compute/v1/projects/" + ctx.ProjectID 123 sshkeyAttr := "syzkaller:" + sshkey 124 oneAttr := "1" 125 falseAttr := false 126 instance := &compute.Instance{ 127 Name: name, 128 Description: "syzkaller worker", 129 MachineType: prefix + "/zones/" + ctx.ZoneID + "/machineTypes/" + machineType, 130 Disks: []*compute.AttachedDisk{ 131 { 132 AutoDelete: true, 133 Boot: true, 134 Type: "PERSISTENT", 135 DiskSizeGb: int64(diskSizeGB(machineType)), 136 InitializeParams: &compute.AttachedDiskInitializeParams{ 137 DiskName: name, 138 SourceImage: prefix + "/global/images/" + image, 139 }, 140 }, 141 }, 142 Metadata: &compute.Metadata{ 143 Items: []*compute.MetadataItems{ 144 { 145 Key: "ssh-keys", 146 Value: &sshkeyAttr, 147 }, 148 { 149 Key: "serial-port-enable", 150 Value: &oneAttr, 151 }, 152 }, 153 }, 154 NetworkInterfaces: []*compute.NetworkInterface{ 155 { 156 Network: ctx.Network, 157 Subnetwork: ctx.Subnetwork, 158 }, 159 }, 160 Scheduling: &compute.Scheduling{ 161 AutomaticRestart: &falseAttr, 162 Preemptible: preemptible, 163 OnHostMaintenance: "TERMINATE", 164 }, 165 DisplayDevice: &compute.DisplayDevice{ 166 EnableDisplay: displayDevice, 167 }, 168 } 169 retry: 170 if !instance.Scheduling.Preemptible && strings.HasPrefix(machineType, "e2-") { 171 // Otherwise we get "Error 400: Efficient instances do not support 172 // onHostMaintenance=TERMINATE unless they are preemptible". 173 instance.Scheduling.OnHostMaintenance = "MIGRATE" 174 } 175 var op *compute.Operation 176 err := ctx.apiCall(func() (err error) { 177 op, err = ctx.computeService.Instances.Insert(ctx.ProjectID, ctx.ZoneID, instance).Do() 178 return 179 }) 180 if err != nil { 181 return "", fmt.Errorf("failed to create instance: %w", err) 182 } 183 if err := ctx.waitForCompletion("zone", "create instance", op.Name, false); err != nil { 184 var resourcePoolExhaustedError resourcePoolExhaustedError 185 if errors.As(err, &resourcePoolExhaustedError) && instance.Scheduling.Preemptible { 186 instance.Scheduling.Preemptible = false 187 goto retry 188 } 189 return "", err 190 } 191 192 var inst *compute.Instance 193 err = ctx.apiCall(func() (err error) { 194 inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do() 195 return 196 }) 197 if err != nil { 198 return "", fmt.Errorf("error getting instance %s details after creation: %w", name, err) 199 } 200 201 // Finds its internal IP. 202 ip := "" 203 for _, iface := range inst.NetworkInterfaces { 204 if strings.HasPrefix(iface.NetworkIP, "10.") { 205 ip = iface.NetworkIP 206 break 207 } 208 } 209 if ip == "" { 210 return "", fmt.Errorf("didn't find instance internal IP address") 211 } 212 return ip, nil 213 } 214 215 func diskSizeGB(machineType string) int { 216 if strings.HasPrefix(machineType, "c4a-") { 217 // For C4A machines, the only available disk type is "Hyperdisk Balanced", 218 // which must be >= 10GB. 219 return 10 220 } 221 // Use the default value. 222 return 0 223 } 224 225 func (ctx *Context) DeleteInstance(name string, wait bool) error { 226 var op *compute.Operation 227 err := ctx.apiCall(func() (err error) { 228 op, err = ctx.computeService.Instances.Delete(ctx.ProjectID, ctx.ZoneID, name).Do() 229 return 230 }) 231 var apiErr *googleapi.Error 232 if errors.As(err, &apiErr) && apiErr.Code == 404 { 233 return nil 234 } 235 if err != nil { 236 return fmt.Errorf("failed to delete instance: %w", err) 237 } 238 if wait { 239 if err := ctx.waitForCompletion("zone", "delete image", op.Name, true); err != nil { 240 return err 241 } 242 } 243 return nil 244 } 245 246 func (ctx *Context) IsInstanceRunning(name string) bool { 247 var inst *compute.Instance 248 err := ctx.apiCall(func() (err error) { 249 inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do() 250 return 251 }) 252 if err != nil { 253 return false 254 } 255 return inst.Status == "RUNNING" 256 } 257 258 func (ctx *Context) CreateImage(imageName, gcsFile, OS string) error { 259 var features []*compute.GuestOsFeature 260 if OS == targets.Linux { 261 features = []*compute.GuestOsFeature{ 262 { 263 Type: "GVNIC", 264 }, 265 } 266 } 267 image := &compute.Image{ 268 Name: imageName, 269 RawDisk: &compute.ImageRawDisk{ 270 Source: "https://storage.googleapis.com/" + gcsFile, 271 }, 272 Licenses: []string{ 273 "https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx", 274 }, 275 GuestOsFeatures: features, 276 } 277 var op *compute.Operation 278 err := ctx.apiCall(func() (err error) { 279 op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do() 280 return 281 }) 282 if err != nil { 283 // Try again without the vmx license in case it is not supported. 284 image.Licenses = nil 285 err := ctx.apiCall(func() (err error) { 286 op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do() 287 return 288 }) 289 if err != nil { 290 return fmt.Errorf("failed to create image: %w", err) 291 } 292 } 293 if err := ctx.waitForCompletion("global", "create image", op.Name, false); err != nil { 294 return err 295 } 296 return nil 297 } 298 299 func (ctx *Context) DeleteImage(imageName string) error { 300 var op *compute.Operation 301 err := ctx.apiCall(func() (err error) { 302 op, err = ctx.computeService.Images.Delete(ctx.ProjectID, imageName).Do() 303 return 304 }) 305 var apiErr *googleapi.Error 306 if errors.As(err, &apiErr) && apiErr.Code == 404 { 307 return nil 308 } 309 if err != nil { 310 return fmt.Errorf("failed to delete image: %w", err) 311 } 312 if err := ctx.waitForCompletion("global", "delete image", op.Name, true); err != nil { 313 return err 314 } 315 return nil 316 } 317 318 type resourcePoolExhaustedError string 319 320 func (err resourcePoolExhaustedError) Error() string { 321 return string(err) 322 } 323 324 func (ctx *Context) waitForCompletion(typ, desc, opName string, ignoreNotFound bool) error { 325 time.Sleep(3 * time.Second) 326 for { 327 time.Sleep(3 * time.Second) 328 var op *compute.Operation 329 err := ctx.apiCall(func() (err error) { 330 switch typ { 331 case "global": 332 op, err = ctx.computeService.GlobalOperations.Get(ctx.ProjectID, opName).Do() 333 case "zone": 334 op, err = ctx.computeService.ZoneOperations.Get(ctx.ProjectID, ctx.ZoneID, opName).Do() 335 default: 336 panic("unknown operation type: " + typ) 337 } 338 return 339 }) 340 if err != nil { 341 return fmt.Errorf("failed to get %v operation %v: %w", desc, opName, err) 342 } 343 switch op.Status { 344 case "PENDING", "RUNNING": 345 continue 346 case "DONE": 347 if op.Error != nil { 348 reason := "" 349 for _, operr := range op.Error.Errors { 350 if operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED" || 351 operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED_WITH_DETAILS" { 352 return resourcePoolExhaustedError(fmt.Sprintf("%+v", operr)) 353 } 354 if ignoreNotFound && operr.Code == "RESOURCE_NOT_FOUND" { 355 return nil 356 } 357 reason += fmt.Sprintf("%+v.", operr) 358 } 359 return fmt.Errorf("%v operation failed: %v", desc, reason) 360 } 361 return nil 362 default: 363 return fmt.Errorf("unknown %v operation status %q: %+v", desc, op.Status, op) 364 } 365 } 366 } 367 368 func (ctx *Context) getMeta(path string) (string, error) { 369 req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil) 370 if err != nil { 371 return "", err 372 } 373 req.Header.Add("Metadata-Flavor", "Google") 374 resp, err := http.DefaultClient.Do(req) 375 if err != nil { 376 return "", err 377 } 378 defer resp.Body.Close() 379 body, err := io.ReadAll(resp.Body) 380 if err != nil { 381 return "", err 382 } 383 return string(body), nil 384 } 385 386 func (ctx *Context) apiCall(fn func() error) error { 387 rateLimited := 0 388 for { 389 <-ctx.apiRateGate 390 err := fn() 391 if err != nil { 392 if strings.Contains(err.Error(), "Rate Limit Exceeded") || 393 strings.Contains(err.Error(), "rateLimitExceeded") { 394 rateLimited++ 395 backoff := time.Duration(float64(rateLimited) * 1e9 * (rand.Float64() + 1)) 396 time.Sleep(backoff) 397 if rateLimited < 20 { 398 continue 399 } 400 } 401 } 402 return err 403 } 404 } 405 406 var zoneNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*[-][a-zA-Z0-9]*$") 407 408 func validateZone(zone string) bool { 409 return zoneNameRe.MatchString(zone) 410 } 411 412 var regionNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*") 413 414 func zoneToRegion(zone string) string { 415 return regionNameRe.FindString(zone) 416 }