github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/gce/gce.go (about) 1 // Copyright 2016 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // Package gce provides wrappers around Google Compute Engine (GCE) APIs. 5 // It is assumed that the program itself also runs on GCE as APIs operate on the current project/zone. 6 // 7 // See https://cloud.google.com/compute/docs for details. 8 // In particular, API reference: 9 // https://cloud.google.com/compute/docs/reference/latest 10 // and Go API wrappers: 11 // https://godoc.org/google.golang.org/api/compute/v1 12 package gce 13 14 import ( 15 "context" 16 "errors" 17 "fmt" 18 "io" 19 "math/rand" 20 "net/http" 21 "regexp" 22 "strings" 23 "time" 24 25 "golang.org/x/oauth2" 26 "golang.org/x/oauth2/google" 27 "google.golang.org/api/compute/v1" 28 "google.golang.org/api/googleapi" 29 ) 30 31 type Context struct { 32 ProjectID string 33 ZoneID string 34 RegionID string 35 Instance string 36 InternalIP string 37 ExternalIP string 38 Network string 39 Subnetwork string 40 41 computeService *compute.Service 42 43 // apiCallTicker ticks regularly, preventing us from accidentally making 44 // GCE API calls too quickly. Our quota is 20 QPS, but we limit ourselves 45 // to less than that because several independent programs can do API calls. 46 apiRateGate <-chan time.Time 47 } 48 49 type CreateArgs struct { 50 Preemptible bool 51 DisplayDevice bool 52 } 53 54 func NewContext(customZoneID string) (*Context, error) { 55 ctx := &Context{ 56 apiRateGate: time.NewTicker(time.Second).C, 57 } 58 background := context.Background() 59 tokenSource, err := google.DefaultTokenSource(background, compute.CloudPlatformScope) 60 if err != nil { 61 return nil, fmt.Errorf("failed to get a token source: %w", err) 62 } 63 httpClient := oauth2.NewClient(background, tokenSource) 64 // nolint 65 // compute.New is deprecated: please use NewService instead. 66 // To provide a custom HTTP client, use option.WithHTTPClient. 67 // If you are using google.golang.org/api/googleapis/transport.APIKey, 68 // use option.WithAPIKey with NewService instead. 69 ctx.computeService, _ = compute.New(httpClient) 70 // Obtain project name, zone and current instance IP address. 71 ctx.ProjectID, err = ctx.getMeta("project/project-id") 72 if err != nil { 73 return nil, fmt.Errorf("failed to query gce project-id: %w", err) 74 } 75 myZoneID, err := ctx.getMeta("instance/zone") 76 if err != nil { 77 return nil, fmt.Errorf("failed to query gce zone: %w", err) 78 } 79 if i := strings.LastIndexByte(myZoneID, '/'); i != -1 { 80 myZoneID = myZoneID[i+1:] // the query returns some nonsense prefix 81 } 82 if customZoneID != "" { 83 ctx.ZoneID = customZoneID 84 } else { 85 ctx.ZoneID = myZoneID 86 } 87 if !validateZone(ctx.ZoneID) { 88 return nil, fmt.Errorf("%q is not a valid zone name", ctx.ZoneID) 89 } 90 ctx.RegionID = zoneToRegion(ctx.ZoneID) 91 if ctx.RegionID == "" { 92 return nil, fmt.Errorf("failed to extract region id from %s", ctx.ZoneID) 93 } 94 ctx.Instance, err = ctx.getMeta("instance/name") 95 if err != nil { 96 return nil, fmt.Errorf("failed to query gce instance name: %w", err) 97 } 98 inst, err := ctx.computeService.Instances.Get(ctx.ProjectID, myZoneID, ctx.Instance).Do() 99 if err != nil { 100 return nil, fmt.Errorf("error getting instance info: %w", err) 101 } 102 for _, iface := range inst.NetworkInterfaces { 103 if strings.HasPrefix(iface.NetworkIP, "10.") { 104 ctx.InternalIP = iface.NetworkIP 105 } 106 for _, ac := range iface.AccessConfigs { 107 if ac.NatIP != "" { 108 ctx.ExternalIP = ac.NatIP 109 } 110 } 111 ctx.Network = iface.Network 112 ctx.Subnetwork = iface.Subnetwork 113 } 114 if ctx.InternalIP == "" { 115 return nil, fmt.Errorf("failed to get current instance internal IP") 116 } 117 return ctx, nil 118 } 119 120 func (ctx *Context) CreateInstance(name, machineType, image, sshkey string, 121 preemptible, displayDevice bool) (string, error) { 122 prefix := "https://www.googleapis.com/compute/v1/projects/" + ctx.ProjectID 123 sshkeyAttr := "syzkaller:" + sshkey 124 oneAttr := "1" 125 falseAttr := false 126 instance := &compute.Instance{ 127 Name: name, 128 Description: "syzkaller worker", 129 MachineType: prefix + "/zones/" + ctx.ZoneID + "/machineTypes/" + machineType, 130 Disks: []*compute.AttachedDisk{ 131 { 132 AutoDelete: true, 133 Boot: true, 134 Type: "PERSISTENT", 135 InitializeParams: &compute.AttachedDiskInitializeParams{ 136 DiskName: name, 137 SourceImage: prefix + "/global/images/" + image, 138 }, 139 }, 140 }, 141 Metadata: &compute.Metadata{ 142 Items: []*compute.MetadataItems{ 143 { 144 Key: "ssh-keys", 145 Value: &sshkeyAttr, 146 }, 147 { 148 Key: "serial-port-enable", 149 Value: &oneAttr, 150 }, 151 }, 152 }, 153 NetworkInterfaces: []*compute.NetworkInterface{ 154 { 155 Network: ctx.Network, 156 Subnetwork: ctx.Subnetwork, 157 }, 158 }, 159 Scheduling: &compute.Scheduling{ 160 AutomaticRestart: &falseAttr, 161 Preemptible: preemptible, 162 OnHostMaintenance: "TERMINATE", 163 }, 164 DisplayDevice: &compute.DisplayDevice{ 165 EnableDisplay: displayDevice, 166 }, 167 } 168 retry: 169 if !instance.Scheduling.Preemptible && strings.HasPrefix(machineType, "e2-") { 170 // Otherwise we get "Error 400: Efficient instances do not support 171 // onHostMaintenance=TERMINATE unless they are preemptible". 172 instance.Scheduling.OnHostMaintenance = "MIGRATE" 173 } 174 var op *compute.Operation 175 err := ctx.apiCall(func() (err error) { 176 op, err = ctx.computeService.Instances.Insert(ctx.ProjectID, ctx.ZoneID, instance).Do() 177 return 178 }) 179 if err != nil { 180 return "", fmt.Errorf("failed to create instance: %w", err) 181 } 182 if err := ctx.waitForCompletion("zone", "create instance", op.Name, false); err != nil { 183 var resourcePoolExhaustedError resourcePoolExhaustedError 184 if errors.As(err, &resourcePoolExhaustedError) && instance.Scheduling.Preemptible { 185 instance.Scheduling.Preemptible = false 186 goto retry 187 } 188 return "", err 189 } 190 191 var inst *compute.Instance 192 err = ctx.apiCall(func() (err error) { 193 inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do() 194 return 195 }) 196 if err != nil { 197 return "", fmt.Errorf("error getting instance %s details after creation: %w", name, err) 198 } 199 200 // Finds its internal IP. 201 ip := "" 202 for _, iface := range inst.NetworkInterfaces { 203 if strings.HasPrefix(iface.NetworkIP, "10.") { 204 ip = iface.NetworkIP 205 break 206 } 207 } 208 if ip == "" { 209 return "", fmt.Errorf("didn't find instance internal IP address") 210 } 211 return ip, nil 212 } 213 214 func (ctx *Context) DeleteInstance(name string, wait bool) error { 215 var op *compute.Operation 216 err := ctx.apiCall(func() (err error) { 217 op, err = ctx.computeService.Instances.Delete(ctx.ProjectID, ctx.ZoneID, name).Do() 218 return 219 }) 220 var apiErr *googleapi.Error 221 if errors.As(err, &apiErr) && apiErr.Code == 404 { 222 return nil 223 } 224 if err != nil { 225 return fmt.Errorf("failed to delete instance: %w", err) 226 } 227 if wait { 228 if err := ctx.waitForCompletion("zone", "delete image", op.Name, true); err != nil { 229 return err 230 } 231 } 232 return nil 233 } 234 235 func (ctx *Context) IsInstanceRunning(name string) bool { 236 var inst *compute.Instance 237 err := ctx.apiCall(func() (err error) { 238 inst, err = ctx.computeService.Instances.Get(ctx.ProjectID, ctx.ZoneID, name).Do() 239 return 240 }) 241 if err != nil { 242 return false 243 } 244 return inst.Status == "RUNNING" 245 } 246 247 func (ctx *Context) CreateImage(imageName, gcsFile string) error { 248 image := &compute.Image{ 249 Name: imageName, 250 RawDisk: &compute.ImageRawDisk{ 251 Source: "https://storage.googleapis.com/" + gcsFile, 252 }, 253 Licenses: []string{ 254 "https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx", 255 }, 256 } 257 var op *compute.Operation 258 err := ctx.apiCall(func() (err error) { 259 op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do() 260 return 261 }) 262 if err != nil { 263 // Try again without the vmx license in case it is not supported. 264 image.Licenses = nil 265 err := ctx.apiCall(func() (err error) { 266 op, err = ctx.computeService.Images.Insert(ctx.ProjectID, image).Do() 267 return 268 }) 269 if err != nil { 270 return fmt.Errorf("failed to create image: %w", err) 271 } 272 } 273 if err := ctx.waitForCompletion("global", "create image", op.Name, false); err != nil { 274 return err 275 } 276 return nil 277 } 278 279 func (ctx *Context) DeleteImage(imageName string) error { 280 var op *compute.Operation 281 err := ctx.apiCall(func() (err error) { 282 op, err = ctx.computeService.Images.Delete(ctx.ProjectID, imageName).Do() 283 return 284 }) 285 var apiErr *googleapi.Error 286 if errors.As(err, &apiErr) && apiErr.Code == 404 { 287 return nil 288 } 289 if err != nil { 290 return fmt.Errorf("failed to delete image: %w", err) 291 } 292 if err := ctx.waitForCompletion("global", "delete image", op.Name, true); err != nil { 293 return err 294 } 295 return nil 296 } 297 298 type resourcePoolExhaustedError string 299 300 func (err resourcePoolExhaustedError) Error() string { 301 return string(err) 302 } 303 304 func (ctx *Context) waitForCompletion(typ, desc, opName string, ignoreNotFound bool) error { 305 time.Sleep(3 * time.Second) 306 for { 307 time.Sleep(3 * time.Second) 308 var op *compute.Operation 309 err := ctx.apiCall(func() (err error) { 310 switch typ { 311 case "global": 312 op, err = ctx.computeService.GlobalOperations.Get(ctx.ProjectID, opName).Do() 313 case "zone": 314 op, err = ctx.computeService.ZoneOperations.Get(ctx.ProjectID, ctx.ZoneID, opName).Do() 315 default: 316 panic("unknown operation type: " + typ) 317 } 318 return 319 }) 320 if err != nil { 321 return fmt.Errorf("failed to get %v operation %v: %w", desc, opName, err) 322 } 323 switch op.Status { 324 case "PENDING", "RUNNING": 325 continue 326 case "DONE": 327 if op.Error != nil { 328 reason := "" 329 for _, operr := range op.Error.Errors { 330 if operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED" || 331 operr.Code == "ZONE_RESOURCE_POOL_EXHAUSTED_WITH_DETAILS" { 332 return resourcePoolExhaustedError(fmt.Sprintf("%+v", operr)) 333 } 334 if ignoreNotFound && operr.Code == "RESOURCE_NOT_FOUND" { 335 return nil 336 } 337 reason += fmt.Sprintf("%+v.", operr) 338 } 339 return fmt.Errorf("%v operation failed: %v", desc, reason) 340 } 341 return nil 342 default: 343 return fmt.Errorf("unknown %v operation status %q: %+v", desc, op.Status, op) 344 } 345 } 346 } 347 348 func (ctx *Context) getMeta(path string) (string, error) { 349 req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil) 350 if err != nil { 351 return "", err 352 } 353 req.Header.Add("Metadata-Flavor", "Google") 354 resp, err := http.DefaultClient.Do(req) 355 if err != nil { 356 return "", err 357 } 358 defer resp.Body.Close() 359 body, err := io.ReadAll(resp.Body) 360 if err != nil { 361 return "", err 362 } 363 return string(body), nil 364 } 365 366 func (ctx *Context) apiCall(fn func() error) error { 367 rateLimited := 0 368 for { 369 <-ctx.apiRateGate 370 err := fn() 371 if err != nil { 372 if strings.Contains(err.Error(), "Rate Limit Exceeded") || 373 strings.Contains(err.Error(), "rateLimitExceeded") { 374 rateLimited++ 375 backoff := time.Duration(float64(rateLimited) * 1e9 * (rand.Float64() + 1)) 376 time.Sleep(backoff) 377 if rateLimited < 20 { 378 continue 379 } 380 } 381 } 382 return err 383 } 384 } 385 386 var zoneNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*[-][a-zA-Z0-9]*$") 387 388 func validateZone(zone string) bool { 389 return zoneNameRe.MatchString(zone) 390 } 391 392 var regionNameRe = regexp.MustCompile("^[a-zA-Z0-9]*-[a-zA-Z0-9]*") 393 394 func zoneToRegion(zone string) string { 395 return regionNameRe.FindString(zone) 396 }