github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/provider/ec2/environ.go (about) 1 // Copyright 2011-2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package ec2 5 6 import ( 7 "fmt" 8 "math/rand" 9 "net" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/juju/errors" 15 "github.com/juju/retry" 16 "github.com/juju/utils" 17 "github.com/juju/utils/clock" 18 "gopkg.in/amz.v3/ec2" 19 "gopkg.in/amz.v3/s3" 20 "gopkg.in/juju/names.v2" 21 22 "github.com/juju/juju/cloudconfig/instancecfg" 23 "github.com/juju/juju/cloudconfig/providerinit" 24 "github.com/juju/juju/constraints" 25 "github.com/juju/juju/environs" 26 "github.com/juju/juju/environs/config" 27 "github.com/juju/juju/environs/instances" 28 "github.com/juju/juju/environs/simplestreams" 29 "github.com/juju/juju/environs/tags" 30 "github.com/juju/juju/instance" 31 "github.com/juju/juju/network" 32 "github.com/juju/juju/provider/common" 33 "github.com/juju/juju/tools" 34 ) 35 36 const ( 37 invalidParameterValue = "InvalidParameterValue" 38 39 // tagName is the AWS-specific tag key that populates resources' 40 // name columns in the console. 41 tagName = "Name" 42 ) 43 44 var ( 45 // Use shortAttempt to poll for short-term events or for retrying API calls. 46 // TODO(katco): 2016-08-09: lp:1611427 47 shortAttempt = utils.AttemptStrategy{ 48 Total: 5 * time.Second, 49 Delay: 200 * time.Millisecond, 50 } 51 52 // aliveInstanceStates are the states which we filter by when listing 53 // instances in an environment. 54 aliveInstanceStates = []string{"pending", "running"} 55 ) 56 57 type environ struct { 58 name string 59 cloud environs.CloudSpec 60 ec2 *ec2.EC2 61 s3 *s3.S3 62 63 // ecfgMutex protects the *Unlocked fields below. 64 ecfgMutex sync.Mutex 65 ecfgUnlocked *environConfig 66 67 availabilityZonesMutex sync.Mutex 68 availabilityZones []common.AvailabilityZone 69 } 70 71 func (e *environ) Config() *config.Config { 72 return e.ecfg().Config 73 } 74 75 func (e *environ) SetConfig(cfg *config.Config) error { 76 ecfg, err := providerInstance.newConfig(cfg) 77 if err != nil { 78 return errors.Trace(err) 79 } 80 e.ecfgMutex.Lock() 81 e.ecfgUnlocked = ecfg 82 e.ecfgMutex.Unlock() 83 return nil 84 } 85 86 func (e *environ) ecfg() *environConfig { 87 e.ecfgMutex.Lock() 88 ecfg := e.ecfgUnlocked 89 e.ecfgMutex.Unlock() 90 return ecfg 91 } 92 93 func (e *environ) Name() string { 94 return e.name 95 } 96 97 // PrepareForBootstrap is part of the Environ interface. 98 func (env *environ) PrepareForBootstrap(ctx environs.BootstrapContext) error { 99 if ctx.ShouldVerifyCredentials() { 100 if err := verifyCredentials(env); err != nil { 101 return err 102 } 103 } 104 ecfg := env.ecfg() 105 vpcID, forceVPCID := ecfg.vpcID(), ecfg.forceVPCID() 106 if err := validateBootstrapVPC(env.ec2, env.cloud.Region, vpcID, forceVPCID, ctx); err != nil { 107 return errors.Trace(err) 108 } 109 return nil 110 } 111 112 // Create is part of the Environ interface. 113 func (env *environ) Create(args environs.CreateParams) error { 114 if err := verifyCredentials(env); err != nil { 115 return err 116 } 117 vpcID := env.ecfg().vpcID() 118 if err := validateModelVPC(env.ec2, env.name, vpcID); err != nil { 119 return errors.Trace(err) 120 } 121 // TODO(axw) 2016-08-04 #1609643 122 // Create global security group(s) here. 123 return nil 124 } 125 126 func (env *environ) validateVPC(logInfof func(string, ...interface{}), badge string) error { 127 return nil 128 } 129 130 // Bootstrap is part of the Environ interface. 131 func (e *environ) Bootstrap(ctx environs.BootstrapContext, args environs.BootstrapParams) (*environs.BootstrapResult, error) { 132 return common.Bootstrap(ctx, e, args) 133 } 134 135 // SupportsSpaces is specified on environs.Networking. 136 func (e *environ) SupportsSpaces() (bool, error) { 137 return true, nil 138 } 139 140 // SupportsSpaceDiscovery is specified on environs.Networking. 141 func (e *environ) SupportsSpaceDiscovery() (bool, error) { 142 return false, nil 143 } 144 145 var unsupportedConstraints = []string{ 146 constraints.Tags, 147 // TODO(anastasiamac 2016-03-16) LP#1557874 148 // use virt-type in StartInstances 149 constraints.VirtType, 150 } 151 152 // ConstraintsValidator is defined on the Environs interface. 153 func (e *environ) ConstraintsValidator() (constraints.Validator, error) { 154 validator := constraints.NewValidator() 155 validator.RegisterConflicts( 156 []string{constraints.InstanceType}, 157 []string{constraints.Mem, constraints.Cores, constraints.CpuPower}) 158 validator.RegisterUnsupported(unsupportedConstraints) 159 instTypeNames := make([]string, len(allInstanceTypes)) 160 for i, itype := range allInstanceTypes { 161 instTypeNames[i] = itype.Name 162 } 163 validator.RegisterVocabulary(constraints.InstanceType, instTypeNames) 164 return validator, nil 165 } 166 167 func archMatches(arches []string, arch *string) bool { 168 if arch == nil { 169 return true 170 } 171 for _, a := range arches { 172 if a == *arch { 173 return true 174 } 175 } 176 return false 177 } 178 179 var ec2AvailabilityZones = (*ec2.EC2).AvailabilityZones 180 181 type ec2AvailabilityZone struct { 182 ec2.AvailabilityZoneInfo 183 } 184 185 func (z *ec2AvailabilityZone) Name() string { 186 return z.AvailabilityZoneInfo.Name 187 } 188 189 func (z *ec2AvailabilityZone) Available() bool { 190 return z.AvailabilityZoneInfo.State == availableState 191 } 192 193 // AvailabilityZones returns a slice of availability zones 194 // for the configured region. 195 func (e *environ) AvailabilityZones() ([]common.AvailabilityZone, error) { 196 e.availabilityZonesMutex.Lock() 197 defer e.availabilityZonesMutex.Unlock() 198 if e.availabilityZones == nil { 199 filter := ec2.NewFilter() 200 filter.Add("region-name", e.cloud.Region) 201 resp, err := ec2AvailabilityZones(e.ec2, filter) 202 if err != nil { 203 return nil, err 204 } 205 logger.Debugf("availability zones: %+v", resp) 206 e.availabilityZones = make([]common.AvailabilityZone, len(resp.Zones)) 207 for i, z := range resp.Zones { 208 e.availabilityZones[i] = &ec2AvailabilityZone{z} 209 } 210 } 211 return e.availabilityZones, nil 212 } 213 214 // InstanceAvailabilityZoneNames returns the availability zone names for each 215 // of the specified instances. 216 func (e *environ) InstanceAvailabilityZoneNames(ids []instance.Id) ([]string, error) { 217 instances, err := e.Instances(ids) 218 if err != nil && err != environs.ErrPartialInstances { 219 return nil, err 220 } 221 zones := make([]string, len(instances)) 222 for i, inst := range instances { 223 if inst == nil { 224 continue 225 } 226 zones[i] = inst.(*ec2Instance).AvailZone 227 } 228 return zones, err 229 } 230 231 type ec2Placement struct { 232 availabilityZone ec2.AvailabilityZoneInfo 233 } 234 235 func (e *environ) parsePlacement(placement string) (*ec2Placement, error) { 236 pos := strings.IndexRune(placement, '=') 237 if pos == -1 { 238 return nil, fmt.Errorf("unknown placement directive: %v", placement) 239 } 240 switch key, value := placement[:pos], placement[pos+1:]; key { 241 case "zone": 242 availabilityZone := value 243 zones, err := e.AvailabilityZones() 244 if err != nil { 245 return nil, err 246 } 247 for _, z := range zones { 248 if z.Name() == availabilityZone { 249 return &ec2Placement{ 250 z.(*ec2AvailabilityZone).AvailabilityZoneInfo, 251 }, nil 252 } 253 } 254 return nil, fmt.Errorf("invalid availability zone %q", availabilityZone) 255 } 256 return nil, fmt.Errorf("unknown placement directive: %v", placement) 257 } 258 259 // PrecheckInstance is defined on the state.Prechecker interface. 260 func (e *environ) PrecheckInstance(series string, cons constraints.Value, placement string) error { 261 if placement != "" { 262 if _, err := e.parsePlacement(placement); err != nil { 263 return err 264 } 265 } 266 if !cons.HasInstanceType() { 267 return nil 268 } 269 // Constraint has an instance-type constraint so let's see if it is valid. 270 for _, itype := range allInstanceTypes { 271 if itype.Name != *cons.InstanceType { 272 continue 273 } 274 if archMatches(itype.Arches, cons.Arch) { 275 return nil 276 } 277 } 278 if cons.Arch == nil { 279 return fmt.Errorf("invalid AWS instance type %q specified", *cons.InstanceType) 280 } 281 return fmt.Errorf("invalid AWS instance type %q and arch %q specified", *cons.InstanceType, *cons.Arch) 282 } 283 284 // MetadataLookupParams returns parameters which are used to query simplestreams metadata. 285 func (e *environ) MetadataLookupParams(region string) (*simplestreams.MetadataLookupParams, error) { 286 if region == "" { 287 region = e.cloud.Region 288 } 289 cloudSpec, err := e.cloudSpec(region) 290 if err != nil { 291 return nil, err 292 } 293 return &simplestreams.MetadataLookupParams{ 294 Series: config.PreferredSeries(e.ecfg()), 295 Region: cloudSpec.Region, 296 Endpoint: cloudSpec.Endpoint, 297 }, nil 298 } 299 300 // Region is specified in the HasRegion interface. 301 func (e *environ) Region() (simplestreams.CloudSpec, error) { 302 return e.cloudSpec(e.cloud.Region) 303 } 304 305 func (e *environ) cloudSpec(region string) (simplestreams.CloudSpec, error) { 306 ec2Region, ok := allRegions[region] 307 if !ok { 308 return simplestreams.CloudSpec{}, fmt.Errorf("unknown region %q", region) 309 } 310 return simplestreams.CloudSpec{ 311 Region: region, 312 Endpoint: ec2Region.EC2Endpoint, 313 }, nil 314 } 315 316 const ( 317 ebsStorage = "ebs" 318 ssdStorage = "ssd" 319 ) 320 321 // DistributeInstances implements the state.InstanceDistributor policy. 322 func (e *environ) DistributeInstances(candidates, distributionGroup []instance.Id) ([]instance.Id, error) { 323 return common.DistributeInstances(e, candidates, distributionGroup) 324 } 325 326 var availabilityZoneAllocations = common.AvailabilityZoneAllocations 327 328 // MaintainInstance is specified in the InstanceBroker interface. 329 func (*environ) MaintainInstance(args environs.StartInstanceParams) error { 330 return nil 331 } 332 333 // resourceName returns the string to use for a resource's Name tag, 334 // to help users identify Juju-managed resources in the AWS console. 335 func resourceName(tag names.Tag, envName string) string { 336 return fmt.Sprintf("juju-%s-%s", envName, tag) 337 } 338 339 // StartInstance is specified in the InstanceBroker interface. 340 func (e *environ) StartInstance(args environs.StartInstanceParams) (_ *environs.StartInstanceResult, resultErr error) { 341 if args.ControllerUUID == "" { 342 return nil, errors.New("missing controller UUID") 343 } 344 var inst *ec2Instance 345 defer func() { 346 if resultErr == nil || inst == nil { 347 return 348 } 349 if err := e.StopInstances(inst.Id()); err != nil { 350 logger.Errorf("error stopping failed instance: %v", err) 351 } 352 }() 353 354 var availabilityZones []string 355 if args.Placement != "" { 356 placement, err := e.parsePlacement(args.Placement) 357 if err != nil { 358 return nil, err 359 } 360 if placement.availabilityZone.State != availableState { 361 return nil, errors.Errorf("availability zone %q is %s", placement.availabilityZone.Name, placement.availabilityZone.State) 362 } 363 availabilityZones = append(availabilityZones, placement.availabilityZone.Name) 364 } 365 366 // If no availability zone is specified, then automatically spread across 367 // the known zones for optimal spread across the instance distribution 368 // group. 369 var zoneInstances []common.AvailabilityZoneInstances 370 if len(availabilityZones) == 0 { 371 var err error 372 var group []instance.Id 373 if args.DistributionGroup != nil { 374 group, err = args.DistributionGroup() 375 if err != nil { 376 return nil, err 377 } 378 } 379 zoneInstances, err = availabilityZoneAllocations(e, group) 380 if err != nil { 381 return nil, err 382 } 383 for _, z := range zoneInstances { 384 availabilityZones = append(availabilityZones, z.ZoneName) 385 } 386 if len(availabilityZones) == 0 { 387 return nil, errors.New("failed to determine availability zones") 388 } 389 } 390 391 arches := args.Tools.Arches() 392 393 spec, err := findInstanceSpec(args.ImageMetadata, &instances.InstanceConstraint{ 394 Region: e.cloud.Region, 395 Series: args.InstanceConfig.Series, 396 Arches: arches, 397 Constraints: args.Constraints, 398 Storage: []string{ssdStorage, ebsStorage}, 399 }) 400 if err != nil { 401 return nil, err 402 } 403 tools, err := args.Tools.Match(tools.Filter{Arch: spec.Image.Arch}) 404 if err != nil { 405 return nil, errors.Errorf("chosen architecture %v not present in %v", spec.Image.Arch, arches) 406 } 407 408 if spec.InstanceType.Deprecated { 409 logger.Infof("deprecated instance type specified: %s", spec.InstanceType.Name) 410 } 411 412 if err := args.InstanceConfig.SetTools(tools); err != nil { 413 return nil, errors.Trace(err) 414 } 415 if err := instancecfg.FinishInstanceConfig(args.InstanceConfig, e.Config()); err != nil { 416 return nil, err 417 } 418 419 userData, err := providerinit.ComposeUserData(args.InstanceConfig, nil, AmazonRenderer{}) 420 if err != nil { 421 return nil, errors.Annotate(err, "cannot make user data") 422 } 423 logger.Debugf("ec2 user data; %d bytes", len(userData)) 424 var apiPort int 425 if args.InstanceConfig.Controller != nil { 426 apiPort = args.InstanceConfig.Controller.Config.APIPort() 427 } else { 428 apiPort = args.InstanceConfig.APIInfo.Ports()[0] 429 } 430 groups, err := e.setUpGroups(args.ControllerUUID, args.InstanceConfig.MachineId, apiPort) 431 if err != nil { 432 return nil, errors.Annotate(err, "cannot set up groups") 433 } 434 435 blockDeviceMappings := getBlockDeviceMappings(args.Constraints, args.InstanceConfig.Series) 436 rootDiskSize := uint64(blockDeviceMappings[0].VolumeSize) * 1024 437 438 // If --constraints spaces=foo was passed, the provisioner will populate 439 // args.SubnetsToZones map. In AWS a subnet can span only one zone, so here 440 // we build the reverse map zonesToSubnets, which we will use to below in 441 // the RunInstance loop to provide an explicit subnet ID, rather than just 442 // AZ. This ensures instances in the same group (units of a service or all 443 // instances when adding a machine manually) will still be evenly 444 // distributed across AZs, but only within subnets of the space constraint. 445 // 446 // TODO(dimitern): This should be done in a provider-independant way. 447 if spaces := args.Constraints.IncludeSpaces(); len(spaces) > 1 { 448 logger.Infof("ignoring all but the first positive space from constraints: %v", spaces) 449 } 450 451 var instResp *ec2.RunInstancesResp 452 commonRunArgs := &ec2.RunInstances{ 453 MinCount: 1, 454 MaxCount: 1, 455 UserData: userData, 456 InstanceType: spec.InstanceType.Name, 457 SecurityGroups: groups, 458 BlockDeviceMappings: blockDeviceMappings, 459 ImageId: spec.Image.Id, 460 } 461 462 haveVPCID := isVPCIDSet(e.ecfg().vpcID()) 463 464 for _, zone := range availabilityZones { 465 runArgs := commonRunArgs 466 runArgs.AvailZone = zone 467 468 var subnetIDsForZone []string 469 var subnetErr error 470 if haveVPCID { 471 var allowedSubnetIDs []string 472 for subnetID, _ := range args.SubnetsToZones { 473 allowedSubnetIDs = append(allowedSubnetIDs, string(subnetID)) 474 } 475 subnetIDsForZone, subnetErr = getVPCSubnetIDsForAvailabilityZone(e.ec2, e.ecfg().vpcID(), zone, allowedSubnetIDs) 476 } else if args.Constraints.HaveSpaces() { 477 subnetIDsForZone, subnetErr = findSubnetIDsForAvailabilityZone(zone, args.SubnetsToZones) 478 } 479 480 switch { 481 case subnetErr != nil && errors.IsNotFound(subnetErr): 482 logger.Infof("no matching subnets in zone %q; assuming zone is constrained and trying another", zone) 483 continue 484 case subnetErr != nil: 485 return nil, errors.Annotatef(subnetErr, "getting subnets for zone %q", zone) 486 case len(subnetIDsForZone) > 1: 487 // With multiple equally suitable subnets, picking one at random 488 // will allow for better instance spread within the same zone, and 489 // still work correctly if we happen to pick a constrained subnet 490 // (we'll just treat this the same way we treat constrained zones 491 // and retry). 492 runArgs.SubnetId = subnetIDsForZone[rand.Intn(len(subnetIDsForZone))] 493 logger.Infof( 494 "selected random subnet %q from all matching in zone %q: %v", 495 runArgs.SubnetId, zone, subnetIDsForZone, 496 ) 497 case len(subnetIDsForZone) == 1: 498 runArgs.SubnetId = subnetIDsForZone[0] 499 logger.Infof("selected subnet %q in zone %q", runArgs.SubnetId, zone) 500 } 501 502 instResp, err = runInstances(e.ec2, runArgs) 503 if err == nil || !isZoneOrSubnetConstrainedError(err) { 504 break 505 } 506 507 logger.Infof("%q is constrained, trying another availability zone", zone) 508 } 509 510 if err != nil { 511 return nil, errors.Annotate(err, "cannot run instances") 512 } 513 if len(instResp.Instances) != 1 { 514 return nil, errors.Errorf("expected 1 started instance, got %d", len(instResp.Instances)) 515 } 516 517 inst = &ec2Instance{ 518 e: e, 519 Instance: &instResp.Instances[0], 520 } 521 instAZ := inst.Instance.AvailZone 522 if haveVPCID { 523 instVPC := e.ecfg().vpcID() 524 instSubnet := inst.Instance.SubnetId 525 logger.Infof("started instance %q in AZ %q, subnet %q, VPC %q", inst.Id(), instAZ, instSubnet, instVPC) 526 } else { 527 logger.Infof("started instance %q in AZ %q", inst.Id(), instAZ) 528 } 529 530 // Tag instance, for accounting and identification. 531 instanceName := resourceName( 532 names.NewMachineTag(args.InstanceConfig.MachineId), e.Config().Name(), 533 ) 534 args.InstanceConfig.Tags[tagName] = instanceName 535 if err := tagResources(e.ec2, args.InstanceConfig.Tags, string(inst.Id())); err != nil { 536 return nil, errors.Annotate(err, "tagging instance") 537 } 538 539 // Tag the machine's root EBS volume, if it has one. 540 if inst.Instance.RootDeviceType == "ebs" { 541 cfg := e.Config() 542 tags := tags.ResourceTags( 543 names.NewModelTag(cfg.UUID()), 544 names.NewControllerTag(args.ControllerUUID), 545 cfg, 546 ) 547 tags[tagName] = instanceName + "-root" 548 if err := tagRootDisk(e.ec2, tags, inst.Instance); err != nil { 549 return nil, errors.Annotate(err, "tagging root disk") 550 } 551 } 552 553 hc := instance.HardwareCharacteristics{ 554 Arch: &spec.Image.Arch, 555 Mem: &spec.InstanceType.Mem, 556 CpuCores: &spec.InstanceType.CpuCores, 557 CpuPower: spec.InstanceType.CpuPower, 558 RootDisk: &rootDiskSize, 559 // Tags currently not supported by EC2 560 AvailabilityZone: &inst.Instance.AvailZone, 561 } 562 return &environs.StartInstanceResult{ 563 Instance: inst, 564 Hardware: &hc, 565 }, nil 566 } 567 568 // tagResources calls ec2.CreateTags, tagging each of the specified resources 569 // with the given tags. tagResources will retry for a short period of time 570 // if it receives a *.NotFound error response from EC2. 571 func tagResources(e *ec2.EC2, tags map[string]string, resourceIds ...string) error { 572 if len(tags) == 0 { 573 return nil 574 } 575 ec2Tags := make([]ec2.Tag, 0, len(tags)) 576 for k, v := range tags { 577 ec2Tags = append(ec2Tags, ec2.Tag{k, v}) 578 } 579 var err error 580 for a := shortAttempt.Start(); a.Next(); { 581 _, err = e.CreateTags(resourceIds, ec2Tags) 582 if err == nil || !strings.HasSuffix(ec2ErrCode(err), ".NotFound") { 583 return err 584 } 585 } 586 return err 587 } 588 589 func tagRootDisk(e *ec2.EC2, tags map[string]string, inst *ec2.Instance) error { 590 if len(tags) == 0 { 591 return nil 592 } 593 findVolumeId := func(inst *ec2.Instance) string { 594 for _, m := range inst.BlockDeviceMappings { 595 if m.DeviceName != inst.RootDeviceName { 596 continue 597 } 598 return m.VolumeId 599 } 600 return "" 601 } 602 // Wait until the instance has an associated EBS volume in the 603 // block-device-mapping. 604 volumeId := findVolumeId(inst) 605 // TODO(katco): 2016-08-09: lp:1611427 606 waitRootDiskAttempt := utils.AttemptStrategy{ 607 Total: 5 * time.Minute, 608 Delay: 5 * time.Second, 609 } 610 for a := waitRootDiskAttempt.Start(); volumeId == "" && a.Next(); { 611 resp, err := e.Instances([]string{inst.InstanceId}, nil) 612 if err = errors.Annotate(err, "cannot fetch instance information"); err != nil { 613 logger.Warningf("%v", err) 614 if a.HasNext() == false { 615 return err 616 } 617 logger.Infof("retrying fetch of instances") 618 continue 619 } 620 if len(resp.Reservations) > 0 && len(resp.Reservations[0].Instances) > 0 { 621 inst = &resp.Reservations[0].Instances[0] 622 volumeId = findVolumeId(inst) 623 } 624 } 625 if volumeId == "" { 626 return errors.New("timed out waiting for EBS volume to be associated") 627 } 628 return tagResources(e, tags, volumeId) 629 } 630 631 var runInstances = _runInstances 632 633 // runInstances calls ec2.RunInstances for a fixed number of attempts until 634 // RunInstances returns an error code that does not indicate an error that 635 // may be caused by eventual consistency. 636 func _runInstances(e *ec2.EC2, ri *ec2.RunInstances) (resp *ec2.RunInstancesResp, err error) { 637 for a := shortAttempt.Start(); a.Next(); { 638 resp, err = e.RunInstances(ri) 639 if err == nil || !isNotFoundError(err) { 640 break 641 } 642 } 643 return resp, err 644 } 645 646 func (e *environ) StopInstances(ids ...instance.Id) error { 647 return errors.Trace(e.terminateInstances(ids)) 648 } 649 650 // groupInfoByName returns information on the security group 651 // with the given name including rules and other details. 652 func (e *environ) groupInfoByName(groupName string) (ec2.SecurityGroupInfo, error) { 653 resp, err := e.securityGroupsByNameOrID(groupName) 654 if err != nil { 655 return ec2.SecurityGroupInfo{}, err 656 } 657 658 if len(resp.Groups) != 1 { 659 return ec2.SecurityGroupInfo{}, errors.NewNotFound(fmt.Errorf( 660 "expected one security group named %q, got %v", 661 groupName, resp.Groups, 662 ), "") 663 } 664 return resp.Groups[0], nil 665 } 666 667 // groupByName returns the security group with the given name. 668 func (e *environ) groupByName(groupName string) (ec2.SecurityGroup, error) { 669 groupInfo, err := e.groupInfoByName(groupName) 670 return groupInfo.SecurityGroup, err 671 } 672 673 // isNotFoundError returns whether err is a typed NotFoundError or an EC2 error 674 // code for "group not found", indicating no matching instances (as they are 675 // filtered by group). 676 func isNotFoundError(err error) bool { 677 return err != nil && (errors.IsNotFound(err) || ec2ErrCode(err) == "InvalidGroup.NotFound") 678 } 679 680 // Instances is part of the environs.Environ interface. 681 func (e *environ) Instances(ids []instance.Id) ([]instance.Instance, error) { 682 if len(ids) == 0 { 683 return nil, nil 684 } 685 insts := make([]instance.Instance, len(ids)) 686 // Make a series of requests to cope with eventual consistency. 687 // Each request will attempt to add more instances to the requested 688 // set. 689 var err error 690 for a := shortAttempt.Start(); a.Next(); { 691 var need []string 692 for i, inst := range insts { 693 if inst == nil { 694 need = append(need, string(ids[i])) 695 } 696 } 697 filter := ec2.NewFilter() 698 filter.Add("instance-state-name", aliveInstanceStates...) 699 filter.Add("instance-id", need...) 700 e.addModelFilter(filter) 701 err = e.gatherInstances(ids, insts, filter) 702 if err == nil || err != environs.ErrPartialInstances { 703 break 704 } 705 } 706 if err == environs.ErrPartialInstances { 707 for _, inst := range insts { 708 if inst != nil { 709 return insts, environs.ErrPartialInstances 710 } 711 } 712 return nil, environs.ErrNoInstances 713 } 714 if err != nil { 715 return nil, err 716 } 717 return insts, nil 718 } 719 720 // gatherInstances tries to get information on each instance 721 // id whose corresponding insts slot is nil. 722 // 723 // This function returns environs.ErrPartialInstances if the 724 // insts slice has not been completely filled. 725 func (e *environ) gatherInstances( 726 ids []instance.Id, 727 insts []instance.Instance, 728 filter *ec2.Filter, 729 ) error { 730 resp, err := e.ec2.Instances(nil, filter) 731 if err != nil { 732 return err 733 } 734 n := 0 735 // For each requested id, add it to the returned instances 736 // if we find it in the response. 737 for i, id := range ids { 738 if insts[i] != nil { 739 n++ 740 continue 741 } 742 for j := range resp.Reservations { 743 r := &resp.Reservations[j] 744 for k := range r.Instances { 745 if r.Instances[k].InstanceId != string(id) { 746 continue 747 } 748 inst := r.Instances[k] 749 // TODO(wallyworld): lookup the details to fill in the instance type data 750 insts[i] = &ec2Instance{e: e, Instance: &inst} 751 n++ 752 } 753 } 754 } 755 if n < len(ids) { 756 return environs.ErrPartialInstances 757 } 758 return nil 759 } 760 761 // NetworkInterfaces implements NetworkingEnviron.NetworkInterfaces. 762 func (e *environ) NetworkInterfaces(instId instance.Id) ([]network.InterfaceInfo, error) { 763 var err error 764 var networkInterfacesResp *ec2.NetworkInterfacesResp 765 for a := shortAttempt.Start(); a.Next(); { 766 logger.Tracef("retrieving NICs for instance %q", instId) 767 filter := ec2.NewFilter() 768 filter.Add("attachment.instance-id", string(instId)) 769 networkInterfacesResp, err = e.ec2.NetworkInterfaces(nil, filter) 770 logger.Tracef("instance %q NICs: %#v (err: %v)", instId, networkInterfacesResp, err) 771 if err != nil { 772 logger.Errorf("failed to get instance %q interfaces: %v (retrying)", instId, err) 773 continue 774 } 775 if len(networkInterfacesResp.Interfaces) == 0 { 776 logger.Tracef("instance %q has no NIC attachment yet, retrying...", instId) 777 continue 778 } 779 logger.Tracef("found instance %q NICS: %#v", instId, networkInterfacesResp.Interfaces) 780 break 781 } 782 if err != nil { 783 // either the instance doesn't exist or we couldn't get through to 784 // the ec2 api 785 return nil, errors.Annotatef(err, "cannot get instance %q network interfaces", instId) 786 } 787 ec2Interfaces := networkInterfacesResp.Interfaces 788 result := make([]network.InterfaceInfo, len(ec2Interfaces)) 789 for i, iface := range ec2Interfaces { 790 resp, err := e.ec2.Subnets([]string{iface.SubnetId}, nil) 791 if err != nil { 792 return nil, errors.Annotatef(err, "failed to retrieve subnet %q info", iface.SubnetId) 793 } 794 if len(resp.Subnets) != 1 { 795 return nil, errors.Errorf("expected 1 subnet, got %d", len(resp.Subnets)) 796 } 797 subnet := resp.Subnets[0] 798 cidr := subnet.CIDRBlock 799 800 result[i] = network.InterfaceInfo{ 801 DeviceIndex: iface.Attachment.DeviceIndex, 802 MACAddress: iface.MACAddress, 803 CIDR: cidr, 804 ProviderId: network.Id(iface.Id), 805 ProviderSubnetId: network.Id(iface.SubnetId), 806 AvailabilityZones: []string{subnet.AvailZone}, 807 VLANTag: 0, // Not supported on EC2. 808 // Getting the interface name is not supported on EC2, so fake it. 809 InterfaceName: fmt.Sprintf("unsupported%d", iface.Attachment.DeviceIndex), 810 Disabled: false, 811 NoAutoStart: false, 812 ConfigType: network.ConfigDHCP, 813 InterfaceType: network.EthernetInterface, 814 Address: network.NewScopedAddress(iface.PrivateIPAddress, network.ScopeCloudLocal), 815 } 816 } 817 return result, nil 818 } 819 820 func makeSubnetInfo(cidr string, subnetId network.Id, availZones []string) (network.SubnetInfo, error) { 821 _, _, err := net.ParseCIDR(cidr) 822 if err != nil { 823 return network.SubnetInfo{}, errors.Annotatef(err, "skipping subnet %q, invalid CIDR", cidr) 824 } 825 826 info := network.SubnetInfo{ 827 CIDR: cidr, 828 ProviderId: subnetId, 829 VLANTag: 0, // Not supported on EC2 830 AvailabilityZones: availZones, 831 } 832 logger.Tracef("found subnet with info %#v", info) 833 return info, nil 834 835 } 836 837 // Spaces is not implemented by the ec2 provider as we don't currently have 838 // provider level spaces. 839 func (e *environ) Spaces() ([]network.SpaceInfo, error) { 840 return nil, errors.NotSupportedf("Spaces") 841 } 842 843 // Subnets returns basic information about the specified subnets known 844 // by the provider for the specified instance or list of ids. subnetIds can be 845 // empty, in which case all known are returned. Implements 846 // NetworkingEnviron.Subnets. 847 func (e *environ) Subnets(instId instance.Id, subnetIds []network.Id) ([]network.SubnetInfo, error) { 848 var results []network.SubnetInfo 849 subIdSet := make(map[string]bool) 850 for _, subId := range subnetIds { 851 subIdSet[string(subId)] = false 852 } 853 854 if instId != instance.UnknownId { 855 interfaces, err := e.NetworkInterfaces(instId) 856 if err != nil { 857 return results, errors.Trace(err) 858 } 859 if len(subnetIds) == 0 { 860 for _, iface := range interfaces { 861 subIdSet[string(iface.ProviderSubnetId)] = false 862 } 863 } 864 for _, iface := range interfaces { 865 _, ok := subIdSet[string(iface.ProviderSubnetId)] 866 if !ok { 867 logger.Tracef("subnet %q not in %v, skipping", iface.ProviderSubnetId, subnetIds) 868 continue 869 } 870 subIdSet[string(iface.ProviderSubnetId)] = true 871 info, err := makeSubnetInfo(iface.CIDR, iface.ProviderSubnetId, iface.AvailabilityZones) 872 if err != nil { 873 // Error will already have been logged. 874 continue 875 } 876 results = append(results, info) 877 } 878 } else { 879 resp, err := e.ec2.Subnets(nil, nil) 880 if err != nil { 881 return nil, errors.Annotatef(err, "failed to retrieve subnets") 882 } 883 if len(subnetIds) == 0 { 884 for _, subnet := range resp.Subnets { 885 subIdSet[subnet.Id] = false 886 } 887 } 888 889 for _, subnet := range resp.Subnets { 890 _, ok := subIdSet[subnet.Id] 891 if !ok { 892 logger.Tracef("subnet %q not in %v, skipping", subnet.Id, subnetIds) 893 continue 894 } 895 subIdSet[subnet.Id] = true 896 cidr := subnet.CIDRBlock 897 info, err := makeSubnetInfo(cidr, network.Id(subnet.Id), []string{subnet.AvailZone}) 898 if err != nil { 899 // Error will already have been logged. 900 continue 901 } 902 results = append(results, info) 903 904 } 905 } 906 907 notFound := []string{} 908 for subId, found := range subIdSet { 909 if !found { 910 notFound = append(notFound, subId) 911 } 912 } 913 if len(notFound) != 0 { 914 return nil, errors.Errorf("failed to find the following subnet ids: %v", notFound) 915 } 916 917 return results, nil 918 } 919 920 // AllInstances is part of the environs.InstanceBroker interface. 921 func (e *environ) AllInstances() ([]instance.Instance, error) { 922 return e.AllInstancesByState("pending", "running") 923 } 924 925 // AllInstancesByState returns all instances in the environment 926 // with one of the specified instance states. 927 func (e *environ) AllInstancesByState(states ...string) ([]instance.Instance, error) { 928 // NOTE(axw) we use security group filtering here because instances 929 // start out untagged. If Juju were to abort after starting an instance, 930 // but before tagging it, it would be leaked. We only need to do this 931 // for AllInstances, as it is the result of AllInstances that is used 932 // in "harvesting" unknown instances by the provisioner. 933 // 934 // One possible alternative is to modify ec2.RunInstances to allow the 935 // caller to specify ClientToken, and then format it like 936 // <controller-uuid>:<model-uuid>:<machine-id> 937 // (with base64-encoding to keep the size under the 64-byte limit) 938 // 939 // It is possible to filter on "client-token", and specify wildcards; 940 // therefore we could use client-token filters everywhere in the ec2 941 // provider instead of tags or security groups. The only danger is if 942 // we need to make non-idempotent calls to RunInstances for the machine 943 // ID. I don't think this is needed, but I am not confident enough to 944 // change this fundamental right now. 945 // 946 // An EC2 API call is required to resolve the group name to an id, as 947 // VPC enabled accounts do not support name based filtering. 948 groupName := e.jujuGroupName() 949 group, err := e.groupByName(groupName) 950 if isNotFoundError(err) { 951 // If there's no group, then there cannot be any instances. 952 return nil, nil 953 } else if err != nil { 954 return nil, errors.Trace(err) 955 } 956 filter := ec2.NewFilter() 957 filter.Add("instance-state-name", states...) 958 filter.Add("instance.group-id", group.Id) 959 return e.allInstances(filter) 960 } 961 962 // ControllerInstances is part of the environs.Environ interface. 963 func (e *environ) ControllerInstances(controllerUUID string) ([]instance.Id, error) { 964 filter := ec2.NewFilter() 965 filter.Add("instance-state-name", aliveInstanceStates...) 966 filter.Add(fmt.Sprintf("tag:%s", tags.JujuIsController), "true") 967 e.addControllerFilter(filter, controllerUUID) 968 ids, err := e.allInstanceIDs(filter) 969 if err != nil { 970 return nil, errors.Trace(err) 971 } 972 if len(ids) == 0 { 973 return nil, environs.ErrNotBootstrapped 974 } 975 return ids, nil 976 } 977 978 // allControllerManagedInstances returns the IDs of all instances managed by 979 // this environment's controller. 980 // 981 // Note that this requires that all instances are tagged; we cannot filter on 982 // security groups, as we do not know the names of the models. 983 func (e *environ) allControllerManagedInstances(controllerUUID string) ([]instance.Id, error) { 984 filter := ec2.NewFilter() 985 filter.Add("instance-state-name", aliveInstanceStates...) 986 e.addControllerFilter(filter, controllerUUID) 987 return e.allInstanceIDs(filter) 988 } 989 990 func (e *environ) allInstanceIDs(filter *ec2.Filter) ([]instance.Id, error) { 991 insts, err := e.allInstances(filter) 992 if err != nil { 993 return nil, errors.Trace(err) 994 } 995 ids := make([]instance.Id, len(insts)) 996 for i, inst := range insts { 997 ids[i] = inst.Id() 998 } 999 return ids, nil 1000 } 1001 1002 func (e *environ) allInstances(filter *ec2.Filter) ([]instance.Instance, error) { 1003 resp, err := e.ec2.Instances(nil, filter) 1004 if err != nil { 1005 return nil, errors.Annotate(err, "listing instances") 1006 } 1007 var insts []instance.Instance 1008 for _, r := range resp.Reservations { 1009 for i := range r.Instances { 1010 inst := r.Instances[i] 1011 // TODO(wallyworld): lookup the details to fill in the instance type data 1012 insts = append(insts, &ec2Instance{e: e, Instance: &inst}) 1013 } 1014 } 1015 return insts, nil 1016 } 1017 1018 // Destroy is part of the environs.Environ interface. 1019 func (e *environ) Destroy() error { 1020 if err := common.Destroy(e); err != nil { 1021 return errors.Trace(err) 1022 } 1023 if err := e.cleanEnvironmentSecurityGroups(); err != nil { 1024 return errors.Annotate(err, "cannot delete environment security groups") 1025 } 1026 return nil 1027 } 1028 1029 // DestroyController implements the Environ interface. 1030 func (e *environ) DestroyController(controllerUUID string) error { 1031 // In case any hosted environment hasn't been cleaned up yet, 1032 // we also attempt to delete their resources when the controller 1033 // environment is destroyed. 1034 if err := e.destroyControllerManagedEnvirons(controllerUUID); err != nil { 1035 return errors.Annotate(err, "destroying managed environs") 1036 } 1037 return e.Destroy() 1038 } 1039 1040 // destroyControllerManagedEnvirons destroys all environments managed by this 1041 // environment's controller. 1042 func (e *environ) destroyControllerManagedEnvirons(controllerUUID string) error { 1043 1044 // Terminate all instances managed by the controller. 1045 instIds, err := e.allControllerManagedInstances(controllerUUID) 1046 if err != nil { 1047 return errors.Annotate(err, "listing instances") 1048 } 1049 if err := e.terminateInstances(instIds); err != nil { 1050 return errors.Annotate(err, "terminating instances") 1051 } 1052 1053 // Delete all volumes managed by the controller. 1054 volIds, err := e.allControllerManagedVolumes(controllerUUID) 1055 if err != nil { 1056 return errors.Annotate(err, "listing volumes") 1057 } 1058 errs := destroyVolumes(e.ec2, volIds) 1059 for i, err := range errs { 1060 if err == nil { 1061 continue 1062 } 1063 return errors.Annotatef(err, "destroying volume %q", volIds[i], err) 1064 } 1065 1066 // Delete security groups managed by the controller. 1067 groups, err := e.controllerSecurityGroups(controllerUUID) 1068 if err != nil { 1069 return errors.Trace(err) 1070 } 1071 for _, g := range groups { 1072 if err := deleteSecurityGroupInsistently(e.ec2, g, clock.WallClock); err != nil { 1073 return errors.Annotatef( 1074 err, "cannot delete security group %q (%q)", 1075 g.Name, g.Id, 1076 ) 1077 } 1078 } 1079 return nil 1080 } 1081 1082 func (e *environ) allControllerManagedVolumes(controllerUUID string) ([]string, error) { 1083 filter := ec2.NewFilter() 1084 e.addControllerFilter(filter, controllerUUID) 1085 return listVolumes(e.ec2, filter) 1086 } 1087 1088 func portsToIPPerms(ports []network.PortRange) []ec2.IPPerm { 1089 ipPerms := make([]ec2.IPPerm, len(ports)) 1090 for i, p := range ports { 1091 ipPerms[i] = ec2.IPPerm{ 1092 Protocol: p.Protocol, 1093 FromPort: p.FromPort, 1094 ToPort: p.ToPort, 1095 SourceIPs: []string{"0.0.0.0/0"}, 1096 } 1097 } 1098 return ipPerms 1099 } 1100 1101 func (e *environ) openPortsInGroup(name string, ports []network.PortRange) error { 1102 if len(ports) == 0 { 1103 return nil 1104 } 1105 // Give permissions for anyone to access the given ports. 1106 g, err := e.groupByName(name) 1107 if err != nil { 1108 return err 1109 } 1110 ipPerms := portsToIPPerms(ports) 1111 _, err = e.ec2.AuthorizeSecurityGroup(g, ipPerms) 1112 if err != nil && ec2ErrCode(err) == "InvalidPermission.Duplicate" { 1113 if len(ports) == 1 { 1114 return nil 1115 } 1116 // If there's more than one port and we get a duplicate error, 1117 // then we go through authorizing each port individually, 1118 // otherwise the ports that were *not* duplicates will have 1119 // been ignored 1120 for i := range ipPerms { 1121 _, err := e.ec2.AuthorizeSecurityGroup(g, ipPerms[i:i+1]) 1122 if err != nil && ec2ErrCode(err) != "InvalidPermission.Duplicate" { 1123 return fmt.Errorf("cannot open port %v: %v", ipPerms[i], err) 1124 } 1125 } 1126 return nil 1127 } 1128 if err != nil { 1129 return fmt.Errorf("cannot open ports: %v", err) 1130 } 1131 return nil 1132 } 1133 1134 func (e *environ) closePortsInGroup(name string, ports []network.PortRange) error { 1135 if len(ports) == 0 { 1136 return nil 1137 } 1138 // Revoke permissions for anyone to access the given ports. 1139 // Note that ec2 allows the revocation of permissions that aren't 1140 // granted, so this is naturally idempotent. 1141 g, err := e.groupByName(name) 1142 if err != nil { 1143 return err 1144 } 1145 _, err = e.ec2.RevokeSecurityGroup(g, portsToIPPerms(ports)) 1146 if err != nil { 1147 return fmt.Errorf("cannot close ports: %v", err) 1148 } 1149 return nil 1150 } 1151 1152 func (e *environ) portsInGroup(name string) (ports []network.PortRange, err error) { 1153 group, err := e.groupInfoByName(name) 1154 if err != nil { 1155 return nil, err 1156 } 1157 for _, p := range group.IPPerms { 1158 if len(p.SourceIPs) != 1 { 1159 logger.Errorf("expected exactly one IP permission, found: %v", p) 1160 continue 1161 } 1162 ports = append(ports, network.PortRange{ 1163 Protocol: p.Protocol, 1164 FromPort: p.FromPort, 1165 ToPort: p.ToPort, 1166 }) 1167 } 1168 network.SortPortRanges(ports) 1169 return ports, nil 1170 } 1171 1172 func (e *environ) OpenPorts(ports []network.PortRange) error { 1173 if e.Config().FirewallMode() != config.FwGlobal { 1174 return errors.Errorf("invalid firewall mode %q for opening ports on model", e.Config().FirewallMode()) 1175 } 1176 if err := e.openPortsInGroup(e.globalGroupName(), ports); err != nil { 1177 return errors.Trace(err) 1178 } 1179 logger.Infof("opened ports in global group: %v", ports) 1180 return nil 1181 } 1182 1183 func (e *environ) ClosePorts(ports []network.PortRange) error { 1184 if e.Config().FirewallMode() != config.FwGlobal { 1185 return errors.Errorf("invalid firewall mode %q for closing ports on model", e.Config().FirewallMode()) 1186 } 1187 if err := e.closePortsInGroup(e.globalGroupName(), ports); err != nil { 1188 return errors.Trace(err) 1189 } 1190 logger.Infof("closed ports in global group: %v", ports) 1191 return nil 1192 } 1193 1194 func (e *environ) Ports() ([]network.PortRange, error) { 1195 if e.Config().FirewallMode() != config.FwGlobal { 1196 return nil, errors.Errorf("invalid firewall mode %q for retrieving ports from model", e.Config().FirewallMode()) 1197 } 1198 return e.portsInGroup(e.globalGroupName()) 1199 } 1200 1201 func (*environ) Provider() environs.EnvironProvider { 1202 return &providerInstance 1203 } 1204 1205 func (e *environ) instanceSecurityGroups(instIDs []instance.Id, states ...string) ([]ec2.SecurityGroup, error) { 1206 strInstID := make([]string, len(instIDs)) 1207 for i := range instIDs { 1208 strInstID[i] = string(instIDs[i]) 1209 } 1210 1211 filter := ec2.NewFilter() 1212 if len(states) > 0 { 1213 filter.Add("instance-state-name", states...) 1214 } 1215 1216 resp, err := e.ec2.Instances(strInstID, filter) 1217 if err != nil { 1218 return nil, errors.Annotatef(err, "cannot retrieve instance information from aws to delete security groups") 1219 } 1220 1221 securityGroups := []ec2.SecurityGroup{} 1222 for _, res := range resp.Reservations { 1223 for _, inst := range res.Instances { 1224 logger.Debugf("instance %q has security groups %+v", inst.InstanceId, inst.SecurityGroups) 1225 securityGroups = append(securityGroups, inst.SecurityGroups...) 1226 } 1227 } 1228 return securityGroups, nil 1229 } 1230 1231 // controllerSecurityGroups returns the details of all security groups managed 1232 // by the environment's controller. 1233 func (e *environ) controllerSecurityGroups(controllerUUID string) ([]ec2.SecurityGroup, error) { 1234 filter := ec2.NewFilter() 1235 e.addControllerFilter(filter, controllerUUID) 1236 resp, err := e.ec2.SecurityGroups(nil, filter) 1237 if err != nil { 1238 return nil, errors.Annotate(err, "listing security groups") 1239 } 1240 groups := make([]ec2.SecurityGroup, len(resp.Groups)) 1241 for i, info := range resp.Groups { 1242 groups[i] = ec2.SecurityGroup{Id: info.Id, Name: info.Name} 1243 } 1244 return groups, nil 1245 } 1246 1247 // cleanEnvironmentSecurityGroups attempts to delete all security groups owned 1248 // by the environment. 1249 func (e *environ) cleanEnvironmentSecurityGroups() error { 1250 jujuGroup := e.jujuGroupName() 1251 g, err := e.groupByName(jujuGroup) 1252 if isNotFoundError(err) { 1253 return nil 1254 } 1255 if err != nil { 1256 return errors.Annotatef(err, "cannot retrieve default security group: %q", jujuGroup) 1257 } 1258 if err := deleteSecurityGroupInsistently(e.ec2, g, clock.WallClock); err != nil { 1259 return errors.Annotate(err, "cannot delete default security group") 1260 } 1261 return nil 1262 } 1263 1264 func (e *environ) terminateInstances(ids []instance.Id) error { 1265 if len(ids) == 0 { 1266 return nil 1267 } 1268 1269 // TODO (anastasiamac 2016-04-11) Err if instances still have resources hanging around. 1270 // LP#1568654 1271 defer func() { 1272 e.deleteSecurityGroupsForInstances(ids) 1273 }() 1274 1275 // TODO (anastasiamac 2016-04-7) instance termination would benefit 1276 // from retry with exponential delay just like security groups 1277 // in defer. Bug#1567179. 1278 var err error 1279 for a := shortAttempt.Start(); a.Next(); { 1280 _, err = terminateInstancesById(e.ec2, ids...) 1281 if err == nil || ec2ErrCode(err) != "InvalidInstanceID.NotFound" { 1282 // This will return either success at terminating all instances (1st condition) or 1283 // encountered error as long as it's not NotFound (2nd condition). 1284 return err 1285 } 1286 } 1287 1288 // We will get here only if we got a NotFound error. 1289 // 1. If we attempted to terminate only one instance was, return now. 1290 if len(ids) == 1 { 1291 ids = nil 1292 return nil 1293 } 1294 // 2. If we attempted to terminate several instances and got a NotFound error, 1295 // it means that no instances were terminated. 1296 // So try each instance individually, ignoring a NotFound error this time. 1297 deletedIDs := []instance.Id{} 1298 for _, id := range ids { 1299 _, err = terminateInstancesById(e.ec2, id) 1300 if err == nil { 1301 deletedIDs = append(deletedIDs, id) 1302 } 1303 if err != nil && ec2ErrCode(err) != "InvalidInstanceID.NotFound" { 1304 ids = deletedIDs 1305 return err 1306 } 1307 } 1308 // We will get here if all of the instances are deleted successfully, 1309 // or are not found, which implies they were previously deleted. 1310 ids = deletedIDs 1311 return nil 1312 } 1313 1314 var terminateInstancesById = func(ec2inst *ec2.EC2, ids ...instance.Id) (*ec2.TerminateInstancesResp, error) { 1315 strs := make([]string, len(ids)) 1316 for i, id := range ids { 1317 strs[i] = string(id) 1318 } 1319 return ec2inst.TerminateInstances(strs) 1320 } 1321 1322 func (e *environ) deleteSecurityGroupsForInstances(ids []instance.Id) { 1323 if len(ids) == 0 { 1324 logger.Debugf("no need to delete security groups: no intances were terminated successfully") 1325 return 1326 } 1327 1328 // We only want to attempt deleting security groups for the 1329 // instances that have been successfully terminated. 1330 securityGroups, err := e.instanceSecurityGroups(ids, "shutting-down", "terminated") 1331 if err != nil { 1332 logger.Errorf("cannot determine security groups to delete: %v", err) 1333 return 1334 } 1335 1336 // TODO(perrito666) we need to tag global security groups to be able 1337 // to tell them apart from future groups that are neither machine 1338 // nor environment group. 1339 // https://bugs.launchpad.net/juju-core/+bug/1534289 1340 jujuGroup := e.jujuGroupName() 1341 1342 for _, deletable := range securityGroups { 1343 if deletable.Name == jujuGroup { 1344 continue 1345 } 1346 if err := deleteSecurityGroupInsistently(e.ec2, deletable, clock.WallClock); err != nil { 1347 // In ideal world, we would err out here. 1348 // However: 1349 // 1. We do not know if all instances have been terminated. 1350 // If some instances erred out, they may still be using this security group. 1351 // In this case, our failure to delete security group is reasonable: it's still in use. 1352 // 2. Some security groups may be shared by multiple instances, 1353 // for example, global firewalling. We should not delete these. 1354 logger.Errorf("provider failure: %v", err) 1355 } 1356 } 1357 } 1358 1359 // SecurityGroupCleaner defines provider instance methods needed to delete 1360 // a security group. 1361 type SecurityGroupCleaner interface { 1362 1363 // DeleteSecurityGroup deletes security group on the provider. 1364 DeleteSecurityGroup(group ec2.SecurityGroup) (resp *ec2.SimpleResp, err error) 1365 } 1366 1367 var deleteSecurityGroupInsistently = func(inst SecurityGroupCleaner, group ec2.SecurityGroup, clock clock.Clock) error { 1368 err := retry.Call(retry.CallArgs{ 1369 Attempts: 30, 1370 Delay: time.Second, 1371 MaxDelay: time.Minute, // because 2**29 seconds is beyond reasonable 1372 BackoffFunc: retry.DoubleDelay, 1373 Clock: clock, 1374 Func: func() error { 1375 _, err := inst.DeleteSecurityGroup(group) 1376 if err == nil || isNotFoundError(err) { 1377 logger.Debugf("deleting security group %q", group.Name) 1378 return nil 1379 } 1380 return errors.Trace(err) 1381 }, 1382 NotifyFunc: func(err error, attempt int) { 1383 logger.Debugf("deleting security group %q, attempt %d", group.Name, attempt) 1384 }, 1385 }) 1386 if err != nil { 1387 return errors.Annotatef(err, "cannot delete security group %q: consider deleting it manually", group.Name) 1388 } 1389 return nil 1390 } 1391 1392 func (e *environ) addModelFilter(f *ec2.Filter) { 1393 f.Add(fmt.Sprintf("tag:%s", tags.JujuModel), e.uuid()) 1394 } 1395 1396 func (e *environ) addControllerFilter(f *ec2.Filter, controllerUUID string) { 1397 f.Add(fmt.Sprintf("tag:%s", tags.JujuController), controllerUUID) 1398 } 1399 1400 func (e *environ) uuid() string { 1401 return e.Config().UUID() 1402 } 1403 1404 func (e *environ) globalGroupName() string { 1405 return fmt.Sprintf("%s-global", e.jujuGroupName()) 1406 } 1407 1408 func (e *environ) machineGroupName(machineId string) string { 1409 return fmt.Sprintf("%s-%s", e.jujuGroupName(), machineId) 1410 } 1411 1412 func (e *environ) jujuGroupName() string { 1413 return "juju-" + e.uuid() 1414 } 1415 1416 // setUpGroups creates the security groups for the new machine, and 1417 // returns them. 1418 // 1419 // Instances are tagged with a group so they can be distinguished from 1420 // other instances that might be running on the same EC2 account. In 1421 // addition, a specific machine security group is created for each 1422 // machine, so that its firewall rules can be configured per machine. 1423 func (e *environ) setUpGroups(controllerUUID, machineId string, apiPort int) ([]ec2.SecurityGroup, error) { 1424 1425 // Ensure there's a global group for Juju-related traffic. 1426 jujuGroup, err := e.ensureGroup(controllerUUID, e.jujuGroupName(), 1427 []ec2.IPPerm{{ 1428 Protocol: "tcp", 1429 FromPort: 22, 1430 ToPort: 22, 1431 SourceIPs: []string{"0.0.0.0/0"}, 1432 }, { 1433 Protocol: "tcp", 1434 FromPort: apiPort, 1435 ToPort: apiPort, 1436 SourceIPs: []string{"0.0.0.0/0"}, 1437 }, { 1438 Protocol: "tcp", 1439 FromPort: 0, 1440 ToPort: 65535, 1441 }, { 1442 Protocol: "udp", 1443 FromPort: 0, 1444 ToPort: 65535, 1445 }, { 1446 Protocol: "icmp", 1447 FromPort: -1, 1448 ToPort: -1, 1449 }}, 1450 ) 1451 if err != nil { 1452 return nil, err 1453 } 1454 1455 var machineGroup ec2.SecurityGroup 1456 switch e.Config().FirewallMode() { 1457 case config.FwInstance: 1458 machineGroup, err = e.ensureGroup(controllerUUID, e.machineGroupName(machineId), nil) 1459 case config.FwGlobal: 1460 machineGroup, err = e.ensureGroup(controllerUUID, e.globalGroupName(), nil) 1461 } 1462 if err != nil { 1463 return nil, err 1464 } 1465 return []ec2.SecurityGroup{jujuGroup, machineGroup}, nil 1466 } 1467 1468 // zeroGroup holds the zero security group. 1469 var zeroGroup ec2.SecurityGroup 1470 1471 // securityGroupsByNameOrID calls ec2.SecurityGroups() either with the given 1472 // groupName or with filter by vpc-id and group-name, depending on whether 1473 // vpc-id is empty or not. 1474 func (e *environ) securityGroupsByNameOrID(groupName string) (*ec2.SecurityGroupsResp, error) { 1475 if chosenVPCID := e.ecfg().vpcID(); isVPCIDSet(chosenVPCID) { 1476 // AWS VPC API requires both of these filters (and no 1477 // group names/ids set) for non-default EC2-VPC groups: 1478 filter := ec2.NewFilter() 1479 filter.Add("vpc-id", chosenVPCID) 1480 filter.Add("group-name", groupName) 1481 return e.ec2.SecurityGroups(nil, filter) 1482 } 1483 1484 // EC2-Classic or EC2-VPC with implicit default VPC need to use the 1485 // GroupName.X arguments instead of the filters. 1486 groups := ec2.SecurityGroupNames(groupName) 1487 return e.ec2.SecurityGroups(groups, nil) 1488 } 1489 1490 // ensureGroup returns the security group with name and perms. 1491 // If a group with name does not exist, one will be created. 1492 // If it exists, its permissions are set to perms. 1493 // Any entries in perms without SourceIPs will be granted for 1494 // the named group only. 1495 func (e *environ) ensureGroup(controllerUUID, name string, perms []ec2.IPPerm) (g ec2.SecurityGroup, err error) { 1496 // Specify explicit VPC ID if needed (not for default VPC or EC2-classic). 1497 chosenVPCID := e.ecfg().vpcID() 1498 inVPCLogSuffix := fmt.Sprintf(" (in VPC %q)", chosenVPCID) 1499 if !isVPCIDSet(chosenVPCID) { 1500 chosenVPCID = "" 1501 inVPCLogSuffix = "" 1502 } 1503 1504 resp, err := e.ec2.CreateSecurityGroup(chosenVPCID, name, "juju group") 1505 if err != nil && ec2ErrCode(err) != "InvalidGroup.Duplicate" { 1506 err = errors.Annotatef(err, "creating security group %q%s", name, inVPCLogSuffix) 1507 return zeroGroup, err 1508 } 1509 1510 var have permSet 1511 if err == nil { 1512 g = resp.SecurityGroup 1513 // Tag the created group with the model and controller UUIDs. 1514 cfg := e.Config() 1515 tags := tags.ResourceTags( 1516 names.NewModelTag(cfg.UUID()), 1517 names.NewControllerTag(controllerUUID), 1518 cfg, 1519 ) 1520 if err := tagResources(e.ec2, tags, g.Id); err != nil { 1521 return g, errors.Annotate(err, "tagging security group") 1522 } 1523 logger.Debugf("created security group %q with ID %q%s", name, g.Id, inVPCLogSuffix) 1524 } else { 1525 resp, err := e.securityGroupsByNameOrID(name) 1526 if err != nil { 1527 err = errors.Annotatef(err, "fetching security group %q%s", name, inVPCLogSuffix) 1528 return zeroGroup, err 1529 } 1530 if len(resp.Groups) == 0 { 1531 return zeroGroup, errors.NotFoundf("security group %q%s", name, inVPCLogSuffix) 1532 } 1533 info := resp.Groups[0] 1534 // It's possible that the old group has the wrong 1535 // description here, but if it does it's probably due 1536 // to something deliberately playing games with juju, 1537 // so we ignore it. 1538 g = info.SecurityGroup 1539 have = newPermSetForGroup(info.IPPerms, g) 1540 } 1541 1542 want := newPermSetForGroup(perms, g) 1543 revoke := make(permSet) 1544 for p := range have { 1545 if !want[p] { 1546 revoke[p] = true 1547 } 1548 } 1549 if len(revoke) > 0 { 1550 _, err := e.ec2.RevokeSecurityGroup(g, revoke.ipPerms()) 1551 if err != nil { 1552 err = errors.Annotatef(err, "revoking security group %q%s", g.Id, inVPCLogSuffix) 1553 return zeroGroup, err 1554 } 1555 } 1556 1557 add := make(permSet) 1558 for p := range want { 1559 if !have[p] { 1560 add[p] = true 1561 } 1562 } 1563 if len(add) > 0 { 1564 _, err := e.ec2.AuthorizeSecurityGroup(g, add.ipPerms()) 1565 if err != nil { 1566 err = errors.Annotatef(err, "authorizing security group %q%s", g.Id, inVPCLogSuffix) 1567 return zeroGroup, err 1568 } 1569 } 1570 return g, nil 1571 } 1572 1573 // permKey represents a permission for a group or an ip address range to access 1574 // the given range of ports. Only one of groupId or ipAddr should be non-empty. 1575 type permKey struct { 1576 protocol string 1577 fromPort int 1578 toPort int 1579 groupId string 1580 ipAddr string 1581 } 1582 1583 type permSet map[permKey]bool 1584 1585 // newPermSetForGroup returns a set of all the permissions in the 1586 // given slice of IPPerms. It ignores the name and owner 1587 // id in source groups, and any entry with no source ips will 1588 // be granted for the given group only. 1589 func newPermSetForGroup(ps []ec2.IPPerm, group ec2.SecurityGroup) permSet { 1590 m := make(permSet) 1591 for _, p := range ps { 1592 k := permKey{ 1593 protocol: p.Protocol, 1594 fromPort: p.FromPort, 1595 toPort: p.ToPort, 1596 } 1597 if len(p.SourceIPs) > 0 { 1598 for _, ip := range p.SourceIPs { 1599 k.ipAddr = ip 1600 m[k] = true 1601 } 1602 } else { 1603 k.groupId = group.Id 1604 m[k] = true 1605 } 1606 } 1607 return m 1608 } 1609 1610 // ipPerms returns m as a slice of permissions usable 1611 // with the ec2 package. 1612 func (m permSet) ipPerms() (ps []ec2.IPPerm) { 1613 // We could compact the permissions, but it 1614 // hardly seems worth it. 1615 for p := range m { 1616 ipp := ec2.IPPerm{ 1617 Protocol: p.protocol, 1618 FromPort: p.fromPort, 1619 ToPort: p.toPort, 1620 } 1621 if p.ipAddr != "" { 1622 ipp.SourceIPs = []string{p.ipAddr} 1623 } else { 1624 ipp.SourceGroups = []ec2.UserSecurityGroup{{Id: p.groupId}} 1625 } 1626 ps = append(ps, ipp) 1627 } 1628 return 1629 } 1630 1631 func isZoneOrSubnetConstrainedError(err error) bool { 1632 return isZoneConstrainedError(err) || isSubnetConstrainedError(err) 1633 } 1634 1635 // isZoneConstrainedError reports whether or not the error indicates 1636 // RunInstances failed due to the specified availability zone being 1637 // constrained for the instance type being provisioned, or is 1638 // otherwise unusable for the specific request made. 1639 func isZoneConstrainedError(err error) bool { 1640 switch err := err.(type) { 1641 case *ec2.Error: 1642 switch err.Code { 1643 case "Unsupported", "InsufficientInstanceCapacity": 1644 // A big hammer, but we've now seen several different error messages 1645 // for constrained zones, and who knows how many more there might 1646 // be. If the message contains "Availability Zone", it's a fair 1647 // bet that it's constrained or otherwise unusable. 1648 return strings.Contains(err.Message, "Availability Zone") 1649 case "InvalidInput": 1650 // If the region has a default VPC, then we will receive an error 1651 // if the AZ does not have a default subnet. Until we have proper 1652 // support for networks, we'll skip over these. 1653 return strings.HasPrefix(err.Message, "No default subnet for availability zone") 1654 case "VolumeTypeNotAvailableInZone": 1655 return true 1656 } 1657 } 1658 return false 1659 } 1660 1661 // isSubnetConstrainedError reports whether or not the error indicates 1662 // RunInstances failed due to the specified VPC subnet ID being constrained for 1663 // the instance type being provisioned, or is otherwise unusable for the 1664 // specific request made. 1665 func isSubnetConstrainedError(err error) bool { 1666 switch err := err.(type) { 1667 case *ec2.Error: 1668 switch err.Code { 1669 case "InsufficientFreeAddressesInSubnet", "InsufficientInstanceCapacity": 1670 // Subnet and/or VPC general limits reached. 1671 return true 1672 case "InvalidSubnetID.NotFound": 1673 // This shouldn't happen, as we validate the subnet IDs, but it can 1674 // happen if the user manually deleted the subnet outside of Juju. 1675 return true 1676 } 1677 } 1678 return false 1679 } 1680 1681 // If the err is of type *ec2.Error, ec2ErrCode returns 1682 // its code, otherwise it returns the empty string. 1683 func ec2ErrCode(err error) string { 1684 ec2err, _ := errors.Cause(err).(*ec2.Error) 1685 if ec2err == nil { 1686 return "" 1687 } 1688 return ec2err.Code 1689 } 1690 1691 func (e *environ) AllocateContainerAddresses(hostInstanceID instance.Id, containerTag names.MachineTag, preparedInfo []network.InterfaceInfo) ([]network.InterfaceInfo, error) { 1692 return nil, errors.NotSupportedf("container address allocation") 1693 } 1694 1695 func (e *environ) ReleaseContainerAddresses(interfaces []network.ProviderInterfaceInfo) error { 1696 return errors.NotSupportedf("container address allocation") 1697 }