github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/maintenance/aws-janitor/main.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "flag" 23 "fmt" 24 "net/url" 25 "os" 26 "regexp" 27 "strings" 28 "time" 29 30 "github.com/aws/aws-sdk-go/aws" 31 "github.com/aws/aws-sdk-go/aws/awserr" 32 "github.com/aws/aws-sdk-go/aws/session" 33 "github.com/aws/aws-sdk-go/service/autoscaling" 34 "github.com/aws/aws-sdk-go/service/ec2" 35 "github.com/aws/aws-sdk-go/service/iam" 36 "github.com/aws/aws-sdk-go/service/route53" 37 "github.com/aws/aws-sdk-go/service/s3" 38 "github.com/golang/glog" 39 ) 40 41 const defaultRegion = "us-east-1" 42 43 var maxTTL = flag.Duration("ttl", 24*time.Hour, "Maximum time before we attempt deletion of a resource. Set to 0s to nuke all non-default resources.") 44 var path = flag.String("path", "", "S3 path to store mark data in (required)") 45 46 type awsResourceType interface { 47 // MarkAndSweep queries the resource in a specific region, using 48 // the provided session (which has account-number acct), calling 49 // res.Mark(<resource>) on each resource and deleting 50 // appropriately. 51 MarkAndSweep(sess *session.Session, acct string, region string, res *awsResourceSet) error 52 } 53 54 // AWS resource types known to this script, in dependency order. 55 var awsResourceTypes = []awsResourceType{ 56 autoScalingGroups{}, 57 launchConfigurations{}, 58 instances{}, 59 // Addresses 60 // NetworkInterfaces 61 subnets{}, 62 securityGroups{}, 63 // NetworkACLs 64 // VPN Connections 65 internetGateways{}, 66 routeTables{}, 67 vpcs{}, 68 dhcpOptions{}, 69 volumes{}, 70 addresses{}, 71 } 72 73 // Non-regional AWS resource types, in dependency order 74 var globalAwsResourceTypes = []awsResourceType{ 75 iamInstanceProfiles{}, 76 iamRoles{}, 77 78 route53ResourceRecordSets{}, 79 } 80 81 type awsResource interface { 82 // ARN returns the AWS ARN for the resource 83 // (c.f. http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html). This 84 // is only used for uniqueness in the Mark set, but ARNs are 85 // intended to be globally unique across regions and accounts, so 86 // that works. 87 ARN() string 88 89 // ResourceKey() returns a per-resource key, because ARNs might conflict if two objects 90 // with the same name are created at different times (e.g. IAM roles) 91 ResourceKey() string 92 } 93 94 // awsResourceSet keeps track of the first time we saw a particular 95 // ARN, and the global TTL. See Mark() for more details. 96 type awsResourceSet struct { 97 firstSeen map[string]time.Time // ARN -> first time we saw 98 marked map[string]bool // ARN -> seen this run 99 swept []string // List of resources we attempted to sweep (to summarize) 100 ttl time.Duration 101 } 102 103 func loadResourceSet(sess *session.Session, p *s3path, ttl time.Duration) (*awsResourceSet, error) { 104 s := &awsResourceSet{firstSeen: make(map[string]time.Time), marked: make(map[string]bool), ttl: ttl} 105 svc := s3.New(sess, &aws.Config{Region: aws.String(p.region)}) 106 resp, err := svc.GetObject(&s3.GetObjectInput{Bucket: aws.String(p.bucket), Key: aws.String(p.key)}) 107 if err != nil { 108 if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "NoSuchKey" { 109 return s, nil 110 } 111 return nil, err 112 } 113 defer resp.Body.Close() 114 if err := json.NewDecoder(resp.Body).Decode(&s.firstSeen); err != nil { 115 return nil, err 116 } 117 return s, nil 118 } 119 120 func (s *awsResourceSet) Save(sess *session.Session, p *s3path) error { 121 b, err := json.MarshalIndent(s.firstSeen, "", " ") 122 if err != nil { 123 return err 124 } 125 svc := s3.New(sess, &aws.Config{Region: aws.String(p.region)}) 126 _, err = svc.PutObject(&s3.PutObjectInput{ 127 Bucket: aws.String(p.bucket), 128 Key: aws.String(p.key), 129 Body: bytes.NewReader(b), 130 CacheControl: aws.String("max-age=0"), 131 }) 132 return err 133 } 134 135 // Mark marks a particular resource as currently present, and advises 136 // on whether it should be deleted. If Mark(r) returns true, the TTL 137 // has expired for r and it should be deleted. 138 func (s *awsResourceSet) Mark(r awsResource) bool { 139 key := r.ResourceKey() 140 now := time.Now() 141 142 s.marked[key] = true 143 if t, ok := s.firstSeen[key]; ok { 144 since := now.Sub(t) 145 if since > s.ttl { 146 s.swept = append(s.swept, key) 147 return true 148 } 149 glog.V(1).Infof("%s: seen for %v", key, since) 150 return false 151 } 152 s.firstSeen[key] = now 153 glog.V(1).Infof("%s: first seen", key) 154 if s.ttl == 0 { 155 // If the TTL is 0, it should be deleted now. 156 s.swept = append(s.swept, key) 157 return true 158 } 159 return false 160 } 161 162 // MarkComplete figures out which ARNs were in previous passes but not 163 // this one, and eliminates them. It should only be run after all 164 // resources have been marked. 165 func (s *awsResourceSet) MarkComplete() int { 166 var gone []string 167 for key := range s.firstSeen { 168 if !s.marked[key] { 169 gone = append(gone, key) 170 } 171 } 172 for _, key := range gone { 173 glog.V(1).Infof("%s: deleted since last run", key) 174 delete(s.firstSeen, key) 175 } 176 if len(s.swept) > 0 { 177 glog.Errorf("%d resources swept: %v", len(s.swept), s.swept) 178 } 179 return len(s.swept) 180 } 181 182 // Instances: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeInstances 183 184 type instances struct{} 185 186 func (instances) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 187 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 188 189 inp := &ec2.DescribeInstancesInput{ 190 Filters: []*ec2.Filter{ 191 { 192 Name: aws.String("instance-state-name"), 193 Values: []*string{aws.String("running"), aws.String("pending")}, 194 }, 195 }, 196 } 197 198 var toDelete []*string // Paged call, defer deletion until we have the whole list. 199 if err := svc.DescribeInstancesPages(inp, func(page *ec2.DescribeInstancesOutput, _ bool) bool { 200 for _, res := range page.Reservations { 201 for _, inst := range res.Instances { 202 i := &instance{ 203 Account: acct, 204 Region: region, 205 InstanceID: *inst.InstanceId, 206 } 207 if set.Mark(i) { 208 glog.Warningf("%s: deleting %T: %v", i.ARN(), inst, inst) 209 toDelete = append(toDelete, inst.InstanceId) 210 } 211 } 212 } 213 return true 214 }); err != nil { 215 return err 216 } 217 if len(toDelete) > 0 { 218 // TODO(zmerlynn): In theory this should be split up into 219 // blocks of 1000, but burn that bridge if it ever happens... 220 _, err := svc.TerminateInstances(&ec2.TerminateInstancesInput{InstanceIds: toDelete}) 221 if err != nil { 222 glog.Warningf("termination failed: %v (for %v)", err, toDelete) 223 } 224 } 225 return nil 226 } 227 228 type instance struct { 229 Account string 230 Region string 231 InstanceID string 232 } 233 234 func (i instance) ARN() string { 235 return fmt.Sprintf("arn:aws:ec2:%s:%s:instance/%s", i.Region, i.Account, i.InstanceID) 236 } 237 238 func (i instance) ResourceKey() string { 239 return i.ARN() 240 } 241 242 // AutoScalingGroups: https://docs.aws.amazon.com/sdk-for-go/api/service/autoscaling/#AutoScaling.DescribeAutoScalingGroups 243 244 type autoScalingGroups struct{} 245 246 func (autoScalingGroups) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 247 svc := autoscaling.New(sess, &aws.Config{Region: aws.String(region)}) 248 249 var toDelete []*autoScalingGroup // Paged call, defer deletion until we have the whole list. 250 if err := svc.DescribeAutoScalingGroupsPages(nil, func(page *autoscaling.DescribeAutoScalingGroupsOutput, _ bool) bool { 251 for _, asg := range page.AutoScalingGroups { 252 a := &autoScalingGroup{ID: *asg.AutoScalingGroupARN, Name: *asg.AutoScalingGroupName} 253 if set.Mark(a) { 254 glog.Warningf("%s: deleting %T: %v", a.ARN(), asg, asg) 255 toDelete = append(toDelete, a) 256 } 257 } 258 return true 259 }); err != nil { 260 return err 261 } 262 for _, asg := range toDelete { 263 _, err := svc.DeleteAutoScalingGroup( 264 &autoscaling.DeleteAutoScalingGroupInput{ 265 AutoScalingGroupName: aws.String(asg.Name), 266 ForceDelete: aws.Bool(true), 267 }) 268 if err != nil { 269 glog.Warningf("%v: delete failed: %v", asg.ARN(), err) 270 } 271 } 272 // Block on ASGs finishing deletion. There are a lot of dependent 273 // resources, so this just makes the rest go more smoothly (and 274 // prevents a second pass). 275 for _, asg := range toDelete { 276 glog.Warningf("%v: waiting for delete", asg.ARN()) 277 err := svc.WaitUntilGroupNotExists( 278 &autoscaling.DescribeAutoScalingGroupsInput{ 279 AutoScalingGroupNames: []*string{aws.String(asg.Name)}, 280 }) 281 if err != nil { 282 glog.Warningf("%v: wait failed: %v", asg.ARN(), err) 283 } 284 } 285 return nil 286 } 287 288 type autoScalingGroup struct { 289 ID string 290 Name string 291 } 292 293 func (asg autoScalingGroup) ARN() string { 294 return asg.ID 295 } 296 297 func (asg autoScalingGroup) ResourceKey() string { 298 return asg.ARN() 299 } 300 301 // LaunchConfigurations: http://docs.aws.amazon.com/sdk-for-go/api/service/autoscaling/#AutoScaling.DescribeLaunchConfigurations 302 303 type launchConfigurations struct{} 304 305 func (launchConfigurations) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 306 svc := autoscaling.New(sess, &aws.Config{Region: aws.String(region)}) 307 308 var toDelete []*launchConfiguration // Paged call, defer deletion until we have the whole list. 309 if err := svc.DescribeLaunchConfigurationsPages(nil, func(page *autoscaling.DescribeLaunchConfigurationsOutput, _ bool) bool { 310 for _, lc := range page.LaunchConfigurations { 311 l := &launchConfiguration{ID: *lc.LaunchConfigurationARN, Name: *lc.LaunchConfigurationName} 312 if set.Mark(l) { 313 glog.Warningf("%s: deleting %T: %v", l.ARN(), lc, lc) 314 toDelete = append(toDelete, l) 315 } 316 } 317 return true 318 }); err != nil { 319 return err 320 } 321 for _, lc := range toDelete { 322 _, err := svc.DeleteLaunchConfiguration( 323 &autoscaling.DeleteLaunchConfigurationInput{ 324 LaunchConfigurationName: aws.String(lc.Name), 325 }) 326 if err != nil { 327 glog.Warningf("%v: delete failed: %v", lc.ARN(), err) 328 } 329 } 330 return nil 331 } 332 333 type launchConfiguration struct { 334 ID string 335 Name string 336 } 337 338 func (lc launchConfiguration) ARN() string { 339 return lc.ID 340 } 341 342 func (lc launchConfiguration) ResourceKey() string { 343 return lc.ARN() 344 } 345 346 // Subnets: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeSubnets 347 348 type subnets struct{} 349 350 func (subnets) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 351 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 352 353 resp, err := svc.DescribeSubnets(&ec2.DescribeSubnetsInput{ 354 Filters: []*ec2.Filter{ 355 { 356 Name: aws.String("defaultForAz"), 357 Values: []*string{aws.String("false")}, 358 }, 359 }, 360 }) 361 if err != nil { 362 return err 363 } 364 365 for _, sub := range resp.Subnets { 366 s := &subnet{Account: acct, Region: region, ID: *sub.SubnetId} 367 if set.Mark(s) { 368 glog.Warningf("%s: deleting %T: %v", s.ARN(), sub, sub) 369 _, err := svc.DeleteSubnet(&ec2.DeleteSubnetInput{SubnetId: sub.SubnetId}) 370 if err != nil { 371 glog.Warningf("%v: delete failed: %v", s.ARN(), err) 372 } 373 } 374 } 375 return nil 376 } 377 378 type subnet struct { 379 Account string 380 Region string 381 ID string 382 } 383 384 func (sub subnet) ARN() string { 385 return fmt.Sprintf("arn:aws:ec2:%s:%s:subnet/%s", sub.Region, sub.Account, sub.ID) 386 } 387 388 func (sub subnet) ResourceKey() string { 389 return sub.ARN() 390 } 391 392 // SecurityGroups: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeSecurityGroups 393 394 type securityGroups struct{} 395 396 type sgRef struct { 397 id string 398 perm *ec2.IpPermission 399 } 400 401 func addRefs(refs map[string][]*sgRef, id string, acct string, perms []*ec2.IpPermission) { 402 for _, perm := range perms { 403 for _, pair := range perm.UserIdGroupPairs { 404 // Ignore cross-account for now, and skip circular refs. 405 if *pair.UserId == acct && *pair.GroupId != id { 406 refs[*pair.GroupId] = append(refs[*pair.GroupId], &sgRef{id: id, perm: perm}) 407 } 408 } 409 } 410 } 411 412 func (securityGroups) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 413 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 414 415 resp, err := svc.DescribeSecurityGroups(nil) 416 if err != nil { 417 return err 418 } 419 420 var toDelete []*securityGroup // Deferred to disentangle referencing security groups 421 ingress := make(map[string][]*sgRef) // sg.GroupId -> [sg.GroupIds with this ingress] 422 egress := make(map[string][]*sgRef) // sg.GroupId -> [sg.GroupIds with this egress] 423 for _, sg := range resp.SecurityGroups { 424 if *sg.GroupName == "default" { 425 // TODO(zmerlynn): Is there really no better way to detect this? 426 continue 427 } 428 s := &securityGroup{Account: acct, Region: region, ID: *sg.GroupId} 429 addRefs(ingress, *sg.GroupId, acct, sg.IpPermissions) 430 addRefs(egress, *sg.GroupId, acct, sg.IpPermissionsEgress) 431 if set.Mark(s) { 432 glog.Warningf("%s: deleting %T: %v", s.ARN(), sg, sg) 433 toDelete = append(toDelete, s) 434 } 435 } 436 for _, sg := range toDelete { 437 for _, ref := range ingress[sg.ID] { 438 glog.Infof("%v: revoking reference from %v", sg.ARN(), ref.id) 439 _, err := svc.RevokeSecurityGroupIngress(&ec2.RevokeSecurityGroupIngressInput{ 440 GroupId: aws.String(ref.id), 441 IpPermissions: []*ec2.IpPermission{ref.perm}, 442 }) 443 if err != nil { 444 glog.Warningf("%v: failed to revoke ingress reference from %v: %v", sg.ARN(), ref.id, err) 445 } 446 } 447 for _, ref := range egress[sg.ID] { 448 _, err := svc.RevokeSecurityGroupEgress(&ec2.RevokeSecurityGroupEgressInput{ 449 GroupId: aws.String(ref.id), 450 IpPermissions: []*ec2.IpPermission{ref.perm}, 451 }) 452 if err != nil { 453 glog.Warningf("%v: failed to revoke egress reference from %v: %v", sg.ARN(), ref.id, err) 454 } 455 } 456 _, err := svc.DeleteSecurityGroup(&ec2.DeleteSecurityGroupInput{GroupId: aws.String(sg.ID)}) 457 if err != nil { 458 glog.Warningf("%v: delete failed: %v", sg.ARN(), err) 459 } 460 } 461 return nil 462 } 463 464 type securityGroup struct { 465 Account string 466 Region string 467 ID string 468 } 469 470 func (sg securityGroup) ARN() string { 471 return fmt.Sprintf("arn:aws:ec2:%s:%s:security-group/%s", sg.Region, sg.Account, sg.ID) 472 } 473 474 func (sg securityGroup) ResourceKey() string { 475 return sg.ARN() 476 } 477 478 // InternetGateways: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeInternetGateways 479 480 type internetGateways struct{} 481 482 func (internetGateways) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 483 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 484 485 resp, err := svc.DescribeInternetGateways(nil) 486 if err != nil { 487 return err 488 } 489 490 vpcResp, err := svc.DescribeVpcs(&ec2.DescribeVpcsInput{ 491 Filters: []*ec2.Filter{ 492 { 493 Name: aws.String("isDefault"), 494 Values: []*string{aws.String("true")}, 495 }, 496 }, 497 }) 498 if err != nil { 499 return err 500 } 501 502 defaultVpc := vpcResp.Vpcs[0] 503 504 for _, ig := range resp.InternetGateways { 505 i := &internetGateway{Account: acct, Region: region, ID: *ig.InternetGatewayId} 506 if set.Mark(i) { 507 isDefault := false 508 glog.Warningf("%s: deleting %T: %v", i.ARN(), ig, ig) 509 for _, att := range ig.Attachments { 510 if att.VpcId == defaultVpc.VpcId { 511 isDefault = true 512 break 513 } 514 _, err := svc.DetachInternetGateway(&ec2.DetachInternetGatewayInput{ 515 InternetGatewayId: ig.InternetGatewayId, 516 VpcId: att.VpcId, 517 }) 518 if err != nil { 519 glog.Warningf("%v: detach from %v failed: %v", i.ARN(), *att.VpcId, err) 520 } 521 } 522 if isDefault { 523 glog.Infof("%s: skipping delete as IGW is the default for the VPC %T: %v", i.ARN(), ig, ig) 524 continue 525 } 526 _, err := svc.DeleteInternetGateway(&ec2.DeleteInternetGatewayInput{InternetGatewayId: ig.InternetGatewayId}) 527 if err != nil { 528 glog.Warningf("%v: delete failed: %v", i.ARN(), err) 529 } 530 } 531 } 532 return nil 533 } 534 535 type internetGateway struct { 536 Account string 537 Region string 538 ID string 539 } 540 541 func (ig internetGateway) ARN() string { 542 return fmt.Sprintf("arn:aws:ec2:%s:%s:internet-gateway/%s", ig.Region, ig.Account, ig.ID) 543 } 544 545 func (ig internetGateway) ResourceKey() string { 546 return ig.ARN() 547 } 548 549 // RouteTables: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeRouteTables 550 551 type routeTables struct{} 552 553 func (routeTables) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 554 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 555 556 resp, err := svc.DescribeRouteTables(nil) 557 if err != nil { 558 return err 559 } 560 561 for _, rt := range resp.RouteTables { 562 // Filter out the RouteTables that have a main 563 // association. Given the documentation for the main.association 564 // filter, you'd think we could filter on the Describe, but it 565 // doesn't actually work, see e.g. 566 // https://github.com/aws/aws-cli/issues/1810 567 main := false 568 for _, assoc := range rt.Associations { 569 main = main || *assoc.Main 570 } 571 if main { 572 continue 573 } 574 r := &routeTable{Account: acct, Region: region, ID: *rt.RouteTableId} 575 if set.Mark(r) { 576 for _, assoc := range rt.Associations { 577 glog.Infof("%v: disassociating from %v", r.ARN(), *assoc.SubnetId) 578 _, err := svc.DisassociateRouteTable(&ec2.DisassociateRouteTableInput{ 579 AssociationId: assoc.RouteTableAssociationId}) 580 if err != nil { 581 glog.Warningf("%v: disassociation from subnet %v failed: %v", r.ARN(), *assoc.SubnetId, err) 582 } 583 } 584 glog.Warningf("%s: deleting %T: %v", r.ARN(), rt, rt) 585 _, err := svc.DeleteRouteTable(&ec2.DeleteRouteTableInput{RouteTableId: rt.RouteTableId}) 586 if err != nil { 587 glog.Warningf("%v: delete failed: %v", r.ARN(), err) 588 } 589 } 590 } 591 return nil 592 } 593 594 type routeTable struct { 595 Account string 596 Region string 597 ID string 598 } 599 600 func (rt routeTable) ARN() string { 601 return fmt.Sprintf("arn:aws:ec2:%s:%s:route-table/%s", rt.Region, rt.Account, rt.ID) 602 } 603 604 func (rt routeTable) ResourceKey() string { 605 return rt.ARN() 606 } 607 608 // VPCs: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeVpcs 609 610 type vpcs struct{} 611 612 func (vpcs) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 613 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 614 615 resp, err := svc.DescribeVpcs(&ec2.DescribeVpcsInput{ 616 Filters: []*ec2.Filter{ 617 { 618 Name: aws.String("isDefault"), 619 Values: []*string{aws.String("false")}, 620 }, 621 }, 622 }) 623 if err != nil { 624 return err 625 } 626 627 for _, vp := range resp.Vpcs { 628 v := &vpc{Account: acct, Region: region, ID: *vp.VpcId} 629 if set.Mark(v) { 630 glog.Warningf("%s: deleting %T: %v", v.ARN(), vp, vp) 631 if vp.DhcpOptionsId != nil && *vp.DhcpOptionsId != "default" { 632 _, err := svc.AssociateDhcpOptions(&ec2.AssociateDhcpOptionsInput{ 633 VpcId: vp.VpcId, 634 DhcpOptionsId: aws.String("default"), 635 }) 636 if err != nil { 637 glog.Warningf("%v: disassociating DHCP option set %v failed: %v", v.ARN(), vp.DhcpOptionsId, err) 638 } 639 } 640 _, err := svc.DeleteVpc(&ec2.DeleteVpcInput{VpcId: vp.VpcId}) 641 if err != nil { 642 glog.Warningf("%v: delete failed: %v", v.ARN(), err) 643 } 644 } 645 } 646 return nil 647 } 648 649 type vpc struct { 650 Account string 651 Region string 652 ID string 653 } 654 655 func (vp vpc) ARN() string { 656 return fmt.Sprintf("arn:aws:ec2:%s:%s:vpc/%s", vp.Region, vp.Account, vp.ID) 657 } 658 659 func (vp vpc) ResourceKey() string { 660 return vp.ARN() 661 } 662 663 // DhcpOptions: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeDhcpOptions 664 665 type dhcpOptions struct{} 666 667 func (dhcpOptions) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 668 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 669 670 // This is a little gross, but I can't find an easier way to 671 // figure out the DhcpOptions associated with the default VPC. 672 defaultRefs := make(map[string]bool) 673 { 674 resp, err := svc.DescribeVpcs(&ec2.DescribeVpcsInput{ 675 Filters: []*ec2.Filter{ 676 { 677 Name: aws.String("isDefault"), 678 Values: []*string{aws.String("true")}, 679 }, 680 }, 681 }) 682 if err != nil { 683 return err 684 } 685 for _, vpc := range resp.Vpcs { 686 defaultRefs[*vpc.DhcpOptionsId] = true 687 } 688 } 689 690 resp, err := svc.DescribeDhcpOptions(nil) 691 if err != nil { 692 return err 693 } 694 695 var defaults []string 696 for _, dhcp := range resp.DhcpOptions { 697 if defaultRefs[*dhcp.DhcpOptionsId] { 698 continue 699 } 700 // Separately, skip any "default looking" DHCP Option Sets. See comment below. 701 if defaultLookingDHCPOptions(dhcp, region) { 702 defaults = append(defaults, *dhcp.DhcpOptionsId) 703 continue 704 } 705 dh := &dhcpOption{Account: acct, Region: region, ID: *dhcp.DhcpOptionsId} 706 if set.Mark(dh) { 707 glog.Warningf("%s: deleting %T: %v", dh.ARN(), dhcp, dhcp) 708 _, err := svc.DeleteDhcpOptions(&ec2.DeleteDhcpOptionsInput{DhcpOptionsId: dhcp.DhcpOptionsId}) 709 if err != nil { 710 glog.Warningf("%v: delete failed: %v", dh.ARN(), err) 711 } 712 } 713 } 714 if len(defaults) > 1 { 715 glog.Errorf("Found more than one default-looking DHCP option set: %v", defaults) 716 } 717 return nil 718 } 719 720 // defaultLookingDHCPOptions: This part is a little annoying. If 721 // you're running in a region with where there is no default-looking 722 // DHCP option set, when you create any VPC, AWS will create a 723 // default-looking DHCP option set for you. If you then re-associate 724 // or delete the VPC, the option set will hang around. However, if you 725 // have a default-looking DHCP option set (even with no default VPC) 726 // and create a VPC, AWS will associate the VPC with the DHCP option 727 // set of the default VPC. There's no signal as to whether the option 728 // set returned is the default or was created along with the 729 // VPC. Because of this, we just skip these during cleanup - there 730 // will only ever be one default set per region. 731 func defaultLookingDHCPOptions(dhcp *ec2.DhcpOptions, region string) bool { 732 if len(dhcp.Tags) != 0 { 733 return false 734 } 735 for _, conf := range dhcp.DhcpConfigurations { 736 if *conf.Key == "domain-name" { 737 var domain string 738 if region == "us-east-1" { 739 domain = "ec2.internal" 740 } else { 741 domain = region + ".compute.internal" 742 } 743 if len(conf.Values) != 1 || *conf.Values[0].Value != domain { 744 return false 745 } 746 } else if *conf.Key == "domain-name-servers" { 747 if len(conf.Values) != 1 || *conf.Values[0].Value != "AmazonProvidedDNS" { 748 return false 749 } 750 } else { 751 return false 752 } 753 } 754 return true 755 } 756 757 type dhcpOption struct { 758 Account string 759 Region string 760 ID string 761 } 762 763 func (dhcp dhcpOption) ARN() string { 764 return fmt.Sprintf("arn:aws:ec2:%s:%s:dhcp-option/%s", dhcp.Region, dhcp.Account, dhcp.ID) 765 } 766 767 func (dhcp dhcpOption) ResourceKey() string { 768 return dhcp.ARN() 769 } 770 771 // Volumes: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeVolumes 772 773 type volumes struct{} 774 775 func (volumes) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 776 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 777 778 var toDelete []*volume // Paged call, defer deletion until we have the whole list. 779 if err := svc.DescribeVolumesPages(nil, func(page *ec2.DescribeVolumesOutput, _ bool) bool { 780 for _, vol := range page.Volumes { 781 v := &volume{Account: acct, Region: region, ID: *vol.VolumeId} 782 if set.Mark(v) { 783 glog.Warningf("%s: deleting %T: %v", v.ARN(), vol, vol) 784 toDelete = append(toDelete, v) 785 } 786 } 787 return true 788 }); err != nil { 789 return err 790 } 791 for _, vol := range toDelete { 792 _, err := svc.DeleteVolume(&ec2.DeleteVolumeInput{VolumeId: aws.String(vol.ID)}) 793 if err != nil { 794 glog.Warningf("%v: delete failed: %v", vol.ARN(), err) 795 } 796 } 797 return nil 798 } 799 800 type volume struct { 801 Account string 802 Region string 803 ID string 804 } 805 806 func (vol volume) ARN() string { 807 return fmt.Sprintf("arn:aws:ec2:%s:%s:volume/%s", vol.Region, vol.Account, vol.ID) 808 } 809 810 func (vol volume) ResourceKey() string { 811 return vol.ARN() 812 } 813 814 // Elastic IPs: https://docs.aws.amazon.com/sdk-for-go/api/service/ec2/#EC2.DescribeAddresses 815 816 type addresses struct{} 817 818 func (addresses) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 819 svc := ec2.New(sess, &aws.Config{Region: aws.String(region)}) 820 821 resp, err := svc.DescribeAddresses(nil) 822 if err != nil { 823 return err 824 } 825 826 for _, addr := range resp.Addresses { 827 a := &address{Account: acct, Region: region, ID: *addr.AllocationId} 828 if set.Mark(a) { 829 glog.Warningf("%s: deleting %T: %v", a.ARN(), addr, addr) 830 if addr.AssociationId != nil { 831 glog.Warningf("%s: disassociating %T from active instance", a.ARN(), addr) 832 _, err := svc.DisassociateAddress(&ec2.DisassociateAddressInput{AssociationId: addr.AssociationId}) 833 if err != nil { 834 glog.Warningf("%s: disassociating %T failed: %v", a.ARN(), addr, err) 835 } 836 } 837 _, err := svc.ReleaseAddress(&ec2.ReleaseAddressInput{AllocationId: addr.AllocationId}) 838 if err != nil { 839 glog.Warningf("%v: delete failed: %v", a.ARN(), err) 840 } 841 } 842 } 843 return nil 844 } 845 846 type address struct { 847 Account string 848 Region string 849 ID string 850 } 851 852 func (addr address) ARN() string { 853 // This ARN is a complete hallucination - there doesn't seem to be 854 // an ARN for elastic IPs. 855 return fmt.Sprintf("arn:aws:ec2:%s:%s:address/%s", addr.Region, addr.Account, addr.ID) 856 } 857 858 func (addr address) ResourceKey() string { 859 return addr.ARN() 860 } 861 862 // IAM Roles 863 864 type iamRoles struct{} 865 866 // roleIsManaged checks if the role should be managed (and thus deleted) by us 867 // In particular, we want to avoid "system" AWS roles or roles that might support test-infra 868 func roleIsManaged(role *iam.Role) bool { 869 name := aws.StringValue(role.RoleName) 870 path := aws.StringValue(role.Path) 871 872 // Most AWS system roles are in a directory called `aws-service-role` 873 if strings.HasPrefix(path, "/aws-service-role/") { 874 return false 875 } 876 877 // kops roles have names start with `masters.` or `nodes.` 878 if strings.HasPrefix(name, "masters.") { 879 return true 880 } 881 if strings.HasPrefix(name, "nodes.") { 882 return true 883 } 884 885 glog.Infof("unknown role name=%q, path=%q; assuming not managed", name, path) 886 return false 887 } 888 889 func (iamRoles) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 890 svc := iam.New(sess, &aws.Config{Region: aws.String(region)}) 891 892 var toDelete []*iamRole // Paged call, defer deletion until we have the whole list. 893 if err := svc.ListRolesPages(&iam.ListRolesInput{}, func(page *iam.ListRolesOutput, _ bool) bool { 894 for _, r := range page.Roles { 895 if !roleIsManaged(r) { 896 continue 897 } 898 899 l := &iamRole{arn: aws.StringValue(r.Arn), roleID: aws.StringValue(r.RoleId), roleName: aws.StringValue(r.RoleName)} 900 if set.Mark(l) { 901 glog.Warningf("%s: deleting %T: %v", l.ARN(), r, r) 902 toDelete = append(toDelete, l) 903 } 904 } 905 return true 906 }); err != nil { 907 return err 908 } 909 910 for _, r := range toDelete { 911 if err := r.delete(svc); err != nil { 912 glog.Warningf("%v: delete failed: %v", r.ARN(), err) 913 } 914 } 915 return nil 916 } 917 918 type iamRole struct { 919 arn string 920 roleID string 921 roleName string 922 } 923 924 func (r iamRole) ARN() string { 925 return r.arn 926 } 927 928 func (r iamRole) ResourceKey() string { 929 return r.roleID + "::" + r.ARN() 930 } 931 932 func (r iamRole) delete(svc *iam.IAM) error { 933 roleName := r.roleName 934 935 var policyNames []string 936 { 937 request := &iam.ListRolePoliciesInput{ 938 RoleName: aws.String(roleName), 939 } 940 err := svc.ListRolePoliciesPages(request, func(page *iam.ListRolePoliciesOutput, lastPage bool) bool { 941 for _, policyName := range page.PolicyNames { 942 policyNames = append(policyNames, aws.StringValue(policyName)) 943 } 944 return true 945 }) 946 if err != nil { 947 return fmt.Errorf("error listing IAM role policies for %q: %v", roleName, err) 948 } 949 } 950 951 for _, policyName := range policyNames { 952 glog.V(2).Infof("Deleting IAM role policy %q %q", roleName, policyName) 953 request := &iam.DeleteRolePolicyInput{ 954 RoleName: aws.String(roleName), 955 PolicyName: aws.String(policyName), 956 } 957 _, err := svc.DeleteRolePolicy(request) 958 if err != nil { 959 return fmt.Errorf("error deleting IAM role policy %q %q: %v", roleName, policyName, err) 960 } 961 } 962 963 { 964 glog.V(2).Infof("Deleting IAM role %q", roleName) 965 request := &iam.DeleteRoleInput{ 966 RoleName: aws.String(roleName), 967 } 968 _, err := svc.DeleteRole(request) 969 if err != nil { 970 return fmt.Errorf("error deleting IAM role %q: %v", roleName, err) 971 } 972 } 973 974 return nil 975 } 976 977 // IAM Instance Profiles 978 979 type iamInstanceProfiles struct{} 980 981 func (iamInstanceProfiles) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 982 svc := iam.New(sess, &aws.Config{Region: aws.String(region)}) 983 984 var toDelete []*iamInstanceProfile // Paged call, defer deletion until we have the whole list. 985 if err := svc.ListInstanceProfilesPages(&iam.ListInstanceProfilesInput{}, func(page *iam.ListInstanceProfilesOutput, _ bool) bool { 986 for _, p := range page.InstanceProfiles { 987 // We treat an instance profile as managed if all its roles are 988 managed := true 989 if len(p.Roles) == 0 { 990 // Just in case... 991 managed = false 992 } 993 for _, r := range p.Roles { 994 if !roleIsManaged(r) { 995 managed = false 996 } 997 } 998 if !managed { 999 glog.Infof("ignoring unmanaged profile %s", aws.StringValue(p.Arn)) 1000 continue 1001 } 1002 1003 o := &iamInstanceProfile{profile: p} 1004 if set.Mark(o) { 1005 glog.Warningf("%s: deleting %T: %v", o.ARN(), o, o) 1006 toDelete = append(toDelete, o) 1007 } 1008 } 1009 return true 1010 }); err != nil { 1011 return err 1012 } 1013 1014 for _, o := range toDelete { 1015 if err := o.delete(svc); err != nil { 1016 glog.Warningf("%v: delete failed: %v", o.ARN(), err) 1017 } 1018 } 1019 return nil 1020 } 1021 1022 type iamInstanceProfile struct { 1023 profile *iam.InstanceProfile 1024 } 1025 1026 func (p iamInstanceProfile) ARN() string { 1027 return aws.StringValue(p.profile.Arn) 1028 } 1029 1030 func (p iamInstanceProfile) ResourceKey() string { 1031 return aws.StringValue(p.profile.InstanceProfileId) + "::" + p.ARN() 1032 } 1033 1034 func (p iamInstanceProfile) delete(svc *iam.IAM) error { 1035 // We need to unlink the roles first, before we can delete the instance profile 1036 { 1037 for _, role := range p.profile.Roles { 1038 request := &iam.RemoveRoleFromInstanceProfileInput{ 1039 InstanceProfileName: p.profile.InstanceProfileName, 1040 RoleName: role.RoleName, 1041 } 1042 if _, err := svc.RemoveRoleFromInstanceProfile(request); err != nil { 1043 return fmt.Errorf("error removing role %q: %v", aws.StringValue(role.RoleName), err) 1044 } 1045 } 1046 } 1047 1048 // Delete the instance profile 1049 { 1050 request := &iam.DeleteInstanceProfileInput{ 1051 InstanceProfileName: p.profile.InstanceProfileName, 1052 } 1053 if _, err := svc.DeleteInstanceProfile(request); err != nil { 1054 return err 1055 } 1056 } 1057 1058 return nil 1059 } 1060 1061 // Route53 1062 1063 type route53ResourceRecordSets struct{} 1064 1065 // zoneIsManaged checks if the zone should be managed (and thus have records deleted) by us 1066 func zoneIsManaged(z *route53.HostedZone) bool { 1067 // TODO: Move to a tag on the zone? 1068 name := aws.StringValue(z.Name) 1069 if "test-cncf-aws.k8s.io." == name { 1070 return true 1071 } 1072 1073 glog.Infof("unknown zone %q; ignoring", name) 1074 return false 1075 } 1076 1077 var managedNameRegexes = []*regexp.Regexp{ 1078 // e.g. api.e2e-61246-dba53.test-cncf-aws.k8s.io. 1079 regexp.MustCompile(`^api\.e2e-[0-9]+-`), 1080 1081 // e.g. api.internal.e2e-61246-dba53.test-cncf-aws.k8s.io. 1082 regexp.MustCompile(`^api\.internal\.e2e-[0-9]+-`), 1083 1084 // e.g. etcd-b.internal.e2e-61246-dba53.test-cncf-aws.k8s.io. 1085 regexp.MustCompile(`^etcd-[a-z]\.internal\.e2e-[0-9]+-`), 1086 1087 // e.g. etcd-events-b.internal.e2e-61246-dba53.test-cncf-aws.k8s.io. 1088 regexp.MustCompile(`^etcd-events-[a-z]\.internal\.e2e-[0-9]+-`), 1089 } 1090 1091 // resourceRecordSetIsManaged checks if the resource record should be managed (and thus deleted) by us 1092 func resourceRecordSetIsManaged(rrs *route53.ResourceRecordSet) bool { 1093 if "A" != aws.StringValue(rrs.Type) { 1094 return false 1095 } 1096 1097 name := aws.StringValue(rrs.Name) 1098 1099 for _, managedNameRegex := range managedNameRegexes { 1100 if managedNameRegex.MatchString(name) { 1101 return true 1102 } 1103 } 1104 1105 glog.Infof("ignoring unmanaged name %q", name) 1106 return false 1107 } 1108 1109 func (route53ResourceRecordSets) MarkAndSweep(sess *session.Session, acct string, region string, set *awsResourceSet) error { 1110 svc := route53.New(sess, &aws.Config{Region: aws.String(region)}) 1111 1112 var listError error 1113 1114 err := svc.ListHostedZonesPages(&route53.ListHostedZonesInput{}, func(zones *route53.ListHostedZonesOutput, _ bool) bool { 1115 for _, z := range zones.HostedZones { 1116 if !zoneIsManaged(z) { 1117 continue 1118 } 1119 1120 // Because route53 has such low rate limits, we collect the changes per-zone, to minimize API calls 1121 1122 var toDelete []*route53ResourceRecordSet 1123 1124 err := svc.ListResourceRecordSetsPages(&route53.ListResourceRecordSetsInput{HostedZoneId: z.Id}, func(records *route53.ListResourceRecordSetsOutput, _ bool) bool { 1125 for _, rrs := range records.ResourceRecordSets { 1126 if !resourceRecordSetIsManaged(rrs) { 1127 continue 1128 } 1129 1130 o := &route53ResourceRecordSet{zone: z, obj: rrs} 1131 if set.Mark(o) { 1132 glog.Warningf("%s: deleting %T: %v", o.ARN(), rrs, rrs) 1133 toDelete = append(toDelete, o) 1134 } 1135 } 1136 return true 1137 }) 1138 if err != nil { 1139 listError = err 1140 return false 1141 } 1142 1143 var changes []*route53.Change 1144 for _, rrs := range toDelete { 1145 change := &route53.Change{ 1146 Action: aws.String(route53.ChangeActionDelete), 1147 ResourceRecordSet: rrs.obj, 1148 } 1149 1150 changes = append(changes, change) 1151 } 1152 1153 for len(changes) != 0 { 1154 // Limit of 1000 changes per request 1155 chunk := changes 1156 if len(chunk) > 1000 { 1157 chunk = chunk[:1000] 1158 changes = changes[1000:] 1159 } else { 1160 changes = nil 1161 } 1162 glog.Infof("deleting %d route53 resource records", len(chunk)) 1163 deleteRequest := &route53.ChangeResourceRecordSetsInput{ 1164 HostedZoneId: z.Id, 1165 ChangeBatch: &route53.ChangeBatch{Changes: chunk}, 1166 } 1167 1168 if _, err := svc.ChangeResourceRecordSets(deleteRequest); err != nil { 1169 glog.Warningf("unable to delete DNS records: %v", err) 1170 } 1171 } 1172 } 1173 return true 1174 }) 1175 1176 if listError != nil { 1177 return listError 1178 } 1179 if err != nil { 1180 return err 1181 } 1182 1183 return nil 1184 } 1185 1186 type route53ResourceRecordSet struct { 1187 zone *route53.HostedZone 1188 obj *route53.ResourceRecordSet 1189 } 1190 1191 func (r route53ResourceRecordSet) ARN() string { 1192 return "route53::" + aws.StringValue(r.zone.Id) + "::" + aws.StringValue(r.obj.Type) + "::" + aws.StringValue(r.obj.Name) 1193 } 1194 1195 func (r route53ResourceRecordSet) ResourceKey() string { 1196 return r.ARN() 1197 } 1198 1199 // ARNs (used for uniquifying within our previous mark file) 1200 1201 type arn struct { 1202 partition string 1203 service string 1204 region string 1205 account string 1206 resourceType string 1207 resource string 1208 } 1209 1210 func parseARN(s string) (*arn, error) { 1211 pieces := strings.Split(s, ":") 1212 if len(pieces) != 6 || pieces[0] != "arn" || pieces[1] != "aws" { 1213 return nil, fmt.Errorf("Invalid AWS ARN: %v", s) 1214 } 1215 var resourceType string 1216 var resource string 1217 res := strings.SplitN(pieces[5], "/", 2) 1218 if len(res) == 1 { 1219 resource = res[0] 1220 } else { 1221 resourceType = res[0] 1222 resource = res[1] 1223 } 1224 return &arn{ 1225 partition: pieces[1], 1226 service: pieces[2], 1227 region: pieces[3], 1228 account: pieces[4], 1229 resourceType: resourceType, 1230 resource: resource, 1231 }, nil 1232 } 1233 1234 func getAccount(sess *session.Session, region string) (string, error) { 1235 svc := iam.New(sess, &aws.Config{Region: aws.String(region)}) 1236 resp, err := svc.GetUser(nil) 1237 if err != nil { 1238 return "", err 1239 } 1240 arn, err := parseARN(*resp.User.Arn) 1241 if err != nil { 1242 return "", err 1243 } 1244 return arn.account, nil 1245 } 1246 1247 type s3path struct { 1248 region string 1249 bucket string 1250 key string 1251 } 1252 1253 func getS3Path(sess *session.Session, s string) (*s3path, error) { 1254 url, err := url.Parse(s) 1255 if err != nil { 1256 return nil, err 1257 } 1258 if url.Scheme != "s3" { 1259 return nil, fmt.Errorf("Scheme %q != 's3'", url.Scheme) 1260 } 1261 svc := s3.New(sess, &aws.Config{Region: aws.String(defaultRegion)}) 1262 resp, err := svc.GetBucketLocation(&s3.GetBucketLocationInput{Bucket: aws.String(url.Host)}) 1263 if err != nil { 1264 return nil, err 1265 } 1266 region := "us-east-1" 1267 if resp.LocationConstraint != nil { 1268 region = *resp.LocationConstraint 1269 } 1270 return &s3path{region: region, bucket: url.Host, key: url.Path}, nil 1271 } 1272 1273 func getRegions(sess *session.Session) ([]string, error) { 1274 var regions []string 1275 svc := ec2.New(sess, &aws.Config{Region: aws.String(defaultRegion)}) 1276 resp, err := svc.DescribeRegions(nil) 1277 if err != nil { 1278 return nil, err 1279 } 1280 for _, region := range resp.Regions { 1281 regions = append(regions, *region.RegionName) 1282 } 1283 return regions, nil 1284 } 1285 1286 func main() { 1287 flag.Lookup("logtostderr").Value.Set("true") 1288 flag.Parse() 1289 1290 // Retry aggressively (with default back-off). If the account is 1291 // in a really bad state, we may be contending with API rate 1292 // limiting and fighting against the very resources we're trying 1293 // to delete. 1294 sess := session.Must(session.NewSessionWithOptions(session.Options{Config: aws.Config{MaxRetries: aws.Int(100)}})) 1295 1296 s3p, err := getS3Path(sess, *path) 1297 if err != nil { 1298 glog.Fatalf("--path %q isn't a valid S3 path: %v", *path, err) 1299 } 1300 acct, err := getAccount(sess, defaultRegion) 1301 if err != nil { 1302 glog.Fatalf("error getting current user: %v", err) 1303 } 1304 glog.V(1).Infof("account: %s", acct) 1305 regions, err := getRegions(sess) 1306 if err != nil { 1307 glog.Fatalf("error getting available regions: %v", err) 1308 } 1309 glog.V(1).Infof("regions: %v", regions) 1310 1311 res, err := loadResourceSet(sess, s3p, *maxTTL) 1312 if err != nil { 1313 glog.Fatalf("error loading %q: %v", *path, err) 1314 } 1315 for _, region := range regions { 1316 for _, typ := range awsResourceTypes { 1317 if err := typ.MarkAndSweep(sess, acct, region, res); err != nil { 1318 glog.Errorf("error sweeping %T: %v", typ, err) 1319 return 1320 } 1321 } 1322 } 1323 1324 for _, typ := range globalAwsResourceTypes { 1325 if err := typ.MarkAndSweep(sess, acct, "us-east-1", res); err != nil { 1326 glog.Errorf("error sweeping %T: %v", typ, err) 1327 return 1328 } 1329 } 1330 1331 swept := res.MarkComplete() 1332 if err := res.Save(sess, s3p); err != nil { 1333 glog.Fatalf("error saving %q: %v", *path, err) 1334 } 1335 if swept > 0 { 1336 os.Exit(1) 1337 } 1338 }