github.com/mwhudson/juju@v0.0.0-20160512215208-90ff01f3497f/provider/ec2/environ.go (about) 1 // Copyright 2011-2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package ec2 5 6 import ( 7 "fmt" 8 "math/rand" 9 "net" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/juju/errors" 15 "github.com/juju/names" 16 "github.com/juju/retry" 17 "github.com/juju/utils" 18 "github.com/juju/utils/arch" 19 "github.com/juju/utils/clock" 20 "gopkg.in/amz.v3/aws" 21 "gopkg.in/amz.v3/ec2" 22 "gopkg.in/amz.v3/s3" 23 24 "github.com/juju/juju/cloudconfig/instancecfg" 25 "github.com/juju/juju/cloudconfig/providerinit" 26 "github.com/juju/juju/constraints" 27 "github.com/juju/juju/environs" 28 "github.com/juju/juju/environs/config" 29 "github.com/juju/juju/environs/imagemetadata" 30 "github.com/juju/juju/environs/instances" 31 "github.com/juju/juju/environs/simplestreams" 32 "github.com/juju/juju/environs/tags" 33 "github.com/juju/juju/instance" 34 "github.com/juju/juju/network" 35 "github.com/juju/juju/provider" 36 "github.com/juju/juju/provider/common" 37 "github.com/juju/juju/tools" 38 ) 39 40 const ( 41 invalidParameterValue = "InvalidParameterValue" 42 privateAddressLimitExceeded = "PrivateIpAddressLimitExceeded" 43 44 // tagName is the AWS-specific tag key that populates resources' 45 // name columns in the console. 46 tagName = "Name" 47 ) 48 49 var ( 50 // Use shortAttempt to poll for short-term events or for retrying API calls. 51 shortAttempt = utils.AttemptStrategy{ 52 Total: 5 * time.Second, 53 Delay: 200 * time.Millisecond, 54 } 55 56 // aliveInstanceStates are the states which we filter by when listing 57 // instances in an environment. 58 aliveInstanceStates = []string{"pending", "running"} 59 ) 60 61 type environ struct { 62 common.SupportsUnitPlacementPolicy 63 64 name string 65 66 // archMutex gates access to supportedArchitectures 67 archMutex sync.Mutex 68 // supportedArchitectures caches the architectures 69 // for which images can be instantiated. 70 supportedArchitectures []string 71 72 // ecfgMutex protects the *Unlocked fields below. 73 ecfgMutex sync.Mutex 74 ecfgUnlocked *environConfig 75 ec2Unlocked *ec2.EC2 76 s3Unlocked *s3.S3 77 78 availabilityZonesMutex sync.Mutex 79 availabilityZones []common.AvailabilityZone 80 81 allocationMutex sync.Mutex 82 allocationSupported *bool 83 } 84 85 // AssignPrivateIPAddress is a wrapper around ec2Inst.AssignPrivateIPAddresses. 86 var AssignPrivateIPAddress = assignPrivateIPAddress 87 88 // assignPrivateIPAddress should not be called directly so tests can patch it (use 89 // AssignPrivateIPAddress). 90 func assignPrivateIPAddress(ec2Inst *ec2.EC2, netId string, addr network.Address) error { 91 _, err := ec2Inst.AssignPrivateIPAddresses(netId, []string{addr.Value}, 0, false) 92 return err 93 } 94 95 func (e *environ) Config() *config.Config { 96 return e.ecfg().Config 97 } 98 99 func awsClients(cfg *config.Config) (*ec2.EC2, *s3.S3, *environConfig, error) { 100 ecfg, err := providerInstance.newConfig(cfg) 101 if err != nil { 102 return nil, nil, nil, err 103 } 104 105 auth := aws.Auth{ 106 AccessKey: ecfg.accessKey(), 107 SecretKey: ecfg.secretKey(), 108 } 109 region := aws.Regions[ecfg.region()] 110 signer := aws.SignV4Factory(region.Name, "ec2") 111 return ec2.New(auth, region, signer), s3.New(auth, region), ecfg, nil 112 } 113 114 func (e *environ) SetConfig(cfg *config.Config) error { 115 ec2Client, s3Client, ecfg, err := awsClients(cfg) 116 if err != nil { 117 return err 118 } 119 120 e.ecfgMutex.Lock() 121 defer e.ecfgMutex.Unlock() 122 e.ecfgUnlocked = ecfg 123 e.ec2Unlocked = ec2Client 124 e.s3Unlocked = s3Client 125 126 return nil 127 } 128 129 func (e *environ) ecfg() *environConfig { 130 e.ecfgMutex.Lock() 131 ecfg := e.ecfgUnlocked 132 e.ecfgMutex.Unlock() 133 return ecfg 134 } 135 136 func (e *environ) ec2() *ec2.EC2 { 137 e.ecfgMutex.Lock() 138 ec2 := e.ec2Unlocked 139 e.ecfgMutex.Unlock() 140 return ec2 141 } 142 143 func (e *environ) Name() string { 144 return e.name 145 } 146 147 func (e *environ) Bootstrap(ctx environs.BootstrapContext, args environs.BootstrapParams) (*environs.BootstrapResult, error) { 148 return common.Bootstrap(ctx, e, args) 149 } 150 151 // SupportedArchitectures is specified on the EnvironCapability interface. 152 func (e *environ) SupportedArchitectures() ([]string, error) { 153 e.archMutex.Lock() 154 defer e.archMutex.Unlock() 155 if e.supportedArchitectures != nil { 156 return e.supportedArchitectures, nil 157 } 158 // Create a filter to get all images from our region and for the correct stream. 159 cloudSpec, err := e.Region() 160 if err != nil { 161 return nil, err 162 } 163 imageConstraint := imagemetadata.NewImageConstraint(simplestreams.LookupParams{ 164 CloudSpec: cloudSpec, 165 Stream: e.Config().ImageStream(), 166 }) 167 e.supportedArchitectures, err = common.SupportedArchitectures(e, imageConstraint) 168 return e.supportedArchitectures, err 169 } 170 171 // SupportsSpaces is specified on environs.Networking. 172 func (e *environ) SupportsSpaces() (bool, error) { 173 return true, nil 174 } 175 176 // SupportsSpaceDiscovery is specified on environs.Networking. 177 func (e *environ) SupportsSpaceDiscovery() (bool, error) { 178 return false, nil 179 } 180 181 // SupportsAddressAllocation is specified on environs.Networking. 182 func (e *environ) SupportsAddressAllocation(_ network.Id) (bool, error) { 183 e.allocationMutex.Lock() 184 defer e.allocationMutex.Unlock() 185 186 if e.allocationSupported == nil { 187 var notSupported bool 188 e.allocationSupported = ¬Supported 189 190 if environs.AddressAllocationEnabled(provider.EC2) { 191 defaultVPCID, err := findDefaultVPCID(e.ec2()) 192 if err == nil { 193 logger.Infof("legacy address allocation supported with default VPC %q", defaultVPCID) 194 *e.allocationSupported = true 195 } else if errors.IsNotFound(err) { 196 logger.Infof("legacy address allocation not supported without a default VPC") 197 } 198 } 199 } 200 201 if *e.allocationSupported { 202 return true, nil 203 } 204 return false, errors.NotSupportedf("address allocation") 205 } 206 207 var unsupportedConstraints = []string{ 208 constraints.Tags, 209 // TODO(anastasiamac 2016-03-16) LP#1557874 210 // use virt-type in StartInstances 211 constraints.VirtType, 212 } 213 214 // ConstraintsValidator is defined on the Environs interface. 215 func (e *environ) ConstraintsValidator() (constraints.Validator, error) { 216 validator := constraints.NewValidator() 217 validator.RegisterConflicts( 218 []string{constraints.InstanceType}, 219 []string{constraints.Mem, constraints.CpuCores, constraints.CpuPower}) 220 validator.RegisterUnsupported(unsupportedConstraints) 221 supportedArches, err := e.SupportedArchitectures() 222 if err != nil { 223 return nil, err 224 } 225 validator.RegisterVocabulary(constraints.Arch, supportedArches) 226 instTypeNames := make([]string, len(allInstanceTypes)) 227 for i, itype := range allInstanceTypes { 228 instTypeNames[i] = itype.Name 229 } 230 validator.RegisterVocabulary(constraints.InstanceType, instTypeNames) 231 return validator, nil 232 } 233 234 func archMatches(arches []string, arch *string) bool { 235 if arch == nil { 236 return true 237 } 238 for _, a := range arches { 239 if a == *arch { 240 return true 241 } 242 } 243 return false 244 } 245 246 var ec2AvailabilityZones = (*ec2.EC2).AvailabilityZones 247 248 type ec2AvailabilityZone struct { 249 ec2.AvailabilityZoneInfo 250 } 251 252 func (z *ec2AvailabilityZone) Name() string { 253 return z.AvailabilityZoneInfo.Name 254 } 255 256 func (z *ec2AvailabilityZone) Available() bool { 257 return z.AvailabilityZoneInfo.State == availableState 258 } 259 260 // AvailabilityZones returns a slice of availability zones 261 // for the configured region. 262 func (e *environ) AvailabilityZones() ([]common.AvailabilityZone, error) { 263 e.availabilityZonesMutex.Lock() 264 defer e.availabilityZonesMutex.Unlock() 265 if e.availabilityZones == nil { 266 filter := ec2.NewFilter() 267 filter.Add("region-name", e.ecfg().region()) 268 resp, err := ec2AvailabilityZones(e.ec2(), filter) 269 if err != nil { 270 return nil, err 271 } 272 logger.Debugf("availability zones: %+v", resp) 273 e.availabilityZones = make([]common.AvailabilityZone, len(resp.Zones)) 274 for i, z := range resp.Zones { 275 e.availabilityZones[i] = &ec2AvailabilityZone{z} 276 } 277 } 278 return e.availabilityZones, nil 279 } 280 281 // InstanceAvailabilityZoneNames returns the availability zone names for each 282 // of the specified instances. 283 func (e *environ) InstanceAvailabilityZoneNames(ids []instance.Id) ([]string, error) { 284 instances, err := e.Instances(ids) 285 if err != nil && err != environs.ErrPartialInstances { 286 return nil, err 287 } 288 zones := make([]string, len(instances)) 289 for i, inst := range instances { 290 if inst == nil { 291 continue 292 } 293 zones[i] = inst.(*ec2Instance).AvailZone 294 } 295 return zones, err 296 } 297 298 type ec2Placement struct { 299 availabilityZone ec2.AvailabilityZoneInfo 300 } 301 302 func (e *environ) parsePlacement(placement string) (*ec2Placement, error) { 303 pos := strings.IndexRune(placement, '=') 304 if pos == -1 { 305 return nil, fmt.Errorf("unknown placement directive: %v", placement) 306 } 307 switch key, value := placement[:pos], placement[pos+1:]; key { 308 case "zone": 309 availabilityZone := value 310 zones, err := e.AvailabilityZones() 311 if err != nil { 312 return nil, err 313 } 314 for _, z := range zones { 315 if z.Name() == availabilityZone { 316 return &ec2Placement{ 317 z.(*ec2AvailabilityZone).AvailabilityZoneInfo, 318 }, nil 319 } 320 } 321 return nil, fmt.Errorf("invalid availability zone %q", availabilityZone) 322 } 323 return nil, fmt.Errorf("unknown placement directive: %v", placement) 324 } 325 326 // PrecheckInstance is defined on the state.Prechecker interface. 327 func (e *environ) PrecheckInstance(series string, cons constraints.Value, placement string) error { 328 if placement != "" { 329 if _, err := e.parsePlacement(placement); err != nil { 330 return err 331 } 332 } 333 if !cons.HasInstanceType() { 334 return nil 335 } 336 // Constraint has an instance-type constraint so let's see if it is valid. 337 for _, itype := range allInstanceTypes { 338 if itype.Name != *cons.InstanceType { 339 continue 340 } 341 if archMatches(itype.Arches, cons.Arch) { 342 return nil 343 } 344 } 345 if cons.Arch == nil { 346 return fmt.Errorf("invalid AWS instance type %q specified", *cons.InstanceType) 347 } 348 return fmt.Errorf("invalid AWS instance type %q and arch %q specified", *cons.InstanceType, *cons.Arch) 349 } 350 351 // MetadataLookupParams returns parameters which are used to query simplestreams metadata. 352 func (e *environ) MetadataLookupParams(region string) (*simplestreams.MetadataLookupParams, error) { 353 if region == "" { 354 region = e.ecfg().region() 355 } 356 cloudSpec, err := e.cloudSpec(region) 357 if err != nil { 358 return nil, err 359 } 360 return &simplestreams.MetadataLookupParams{ 361 Series: config.PreferredSeries(e.ecfg()), 362 Region: cloudSpec.Region, 363 Endpoint: cloudSpec.Endpoint, 364 Architectures: arch.AllSupportedArches, 365 }, nil 366 } 367 368 // Region is specified in the HasRegion interface. 369 func (e *environ) Region() (simplestreams.CloudSpec, error) { 370 return e.cloudSpec(e.ecfg().region()) 371 } 372 373 func (e *environ) cloudSpec(region string) (simplestreams.CloudSpec, error) { 374 ec2Region, ok := allRegions[region] 375 if !ok { 376 return simplestreams.CloudSpec{}, fmt.Errorf("unknown region %q", region) 377 } 378 return simplestreams.CloudSpec{ 379 Region: region, 380 Endpoint: ec2Region.EC2Endpoint, 381 }, nil 382 } 383 384 const ( 385 ebsStorage = "ebs" 386 ssdStorage = "ssd" 387 ) 388 389 // DistributeInstances implements the state.InstanceDistributor policy. 390 func (e *environ) DistributeInstances(candidates, distributionGroup []instance.Id) ([]instance.Id, error) { 391 return common.DistributeInstances(e, candidates, distributionGroup) 392 } 393 394 var availabilityZoneAllocations = common.AvailabilityZoneAllocations 395 396 // MaintainInstance is specified in the InstanceBroker interface. 397 func (*environ) MaintainInstance(args environs.StartInstanceParams) error { 398 return nil 399 } 400 401 // resourceName returns the string to use for a resource's Name tag, 402 // to help users identify Juju-managed resources in the AWS console. 403 func resourceName(tag names.Tag, envName string) string { 404 return fmt.Sprintf("juju-%s-%s", envName, tag) 405 } 406 407 // StartInstance is specified in the InstanceBroker interface. 408 func (e *environ) StartInstance(args environs.StartInstanceParams) (_ *environs.StartInstanceResult, resultErr error) { 409 var inst *ec2Instance 410 defer func() { 411 if resultErr == nil || inst == nil { 412 return 413 } 414 if err := e.StopInstances(inst.Id()); err != nil { 415 logger.Errorf("error stopping failed instance: %v", err) 416 } 417 }() 418 419 var availabilityZones []string 420 if args.Placement != "" { 421 placement, err := e.parsePlacement(args.Placement) 422 if err != nil { 423 return nil, err 424 } 425 if placement.availabilityZone.State != availableState { 426 return nil, errors.Errorf("availability zone %q is %s", placement.availabilityZone.Name, placement.availabilityZone.State) 427 } 428 availabilityZones = append(availabilityZones, placement.availabilityZone.Name) 429 } 430 431 // If no availability zone is specified, then automatically spread across 432 // the known zones for optimal spread across the instance distribution 433 // group. 434 var zoneInstances []common.AvailabilityZoneInstances 435 if len(availabilityZones) == 0 { 436 var err error 437 var group []instance.Id 438 if args.DistributionGroup != nil { 439 group, err = args.DistributionGroup() 440 if err != nil { 441 return nil, err 442 } 443 } 444 zoneInstances, err = availabilityZoneAllocations(e, group) 445 if err != nil { 446 return nil, err 447 } 448 for _, z := range zoneInstances { 449 availabilityZones = append(availabilityZones, z.ZoneName) 450 } 451 if len(availabilityZones) == 0 { 452 return nil, errors.New("failed to determine availability zones") 453 } 454 } 455 456 arches := args.Tools.Arches() 457 458 spec, err := findInstanceSpec(args.ImageMetadata, &instances.InstanceConstraint{ 459 Region: e.ecfg().region(), 460 Series: args.InstanceConfig.Series, 461 Arches: arches, 462 Constraints: args.Constraints, 463 Storage: []string{ssdStorage, ebsStorage}, 464 }) 465 if err != nil { 466 return nil, err 467 } 468 tools, err := args.Tools.Match(tools.Filter{Arch: spec.Image.Arch}) 469 if err != nil { 470 return nil, errors.Errorf("chosen architecture %v not present in %v", spec.Image.Arch, arches) 471 } 472 473 if spec.InstanceType.Deprecated { 474 logger.Warningf("deprecated instance type specified: %s", spec.InstanceType.Name) 475 } 476 477 if err := args.InstanceConfig.SetTools(tools); err != nil { 478 return nil, errors.Trace(err) 479 } 480 if err := instancecfg.FinishInstanceConfig(args.InstanceConfig, e.Config()); err != nil { 481 return nil, err 482 } 483 484 userData, err := providerinit.ComposeUserData(args.InstanceConfig, nil, AmazonRenderer{}) 485 if err != nil { 486 return nil, errors.Annotate(err, "cannot make user data") 487 } 488 logger.Debugf("ec2 user data; %d bytes", len(userData)) 489 cfg := e.Config() 490 groups, err := e.setUpGroups(args.InstanceConfig.MachineId, cfg.APIPort()) 491 if err != nil { 492 return nil, errors.Annotate(err, "cannot set up groups") 493 } 494 495 blockDeviceMappings := getBlockDeviceMappings(args.Constraints, args.InstanceConfig.Series) 496 rootDiskSize := uint64(blockDeviceMappings[0].VolumeSize) * 1024 497 498 // If --constraints spaces=foo was passed, the provisioner will populate 499 // args.SubnetsToZones map. In AWS a subnet can span only one zone, so here 500 // we build the reverse map zonesToSubnets, which we will use to below in 501 // the RunInstance loop to provide an explicit subnet ID, rather than just 502 // AZ. This ensures instances in the same group (units of a service or all 503 // instances when adding a machine manually) will still be evenly 504 // distributed across AZs, but only within subnets of the space constraint. 505 // 506 // TODO(dimitern): This should be done in a provider-independant way. 507 if spaces := args.Constraints.IncludeSpaces(); len(spaces) > 1 { 508 logger.Infof("ignoring all but the first positive space from constraints: %v", spaces) 509 } 510 511 var instResp *ec2.RunInstancesResp 512 commonRunArgs := &ec2.RunInstances{ 513 MinCount: 1, 514 MaxCount: 1, 515 UserData: userData, 516 InstanceType: spec.InstanceType.Name, 517 SecurityGroups: groups, 518 BlockDeviceMappings: blockDeviceMappings, 519 ImageId: spec.Image.Id, 520 } 521 522 for _, zone := range availabilityZones { 523 runArgs := commonRunArgs 524 runArgs.AvailZone = zone 525 526 var subnetIDsForZone []string 527 var subnetErr error 528 if e.ecfg().vpcID() != "" && !args.Constraints.HaveSpaces() { 529 subnetIDsForZone, subnetErr = getVPCSubnetIDsForAvailabilityZone(e.ec2(), e.ecfg().vpcID(), zone) 530 } else if e.ecfg().vpcID() == "" && args.Constraints.HaveSpaces() { 531 subnetIDsForZone, subnetErr = findSubnetIDsForAvailabilityZone(zone, args.SubnetsToZones) 532 } 533 534 switch { 535 case subnetErr != nil && errors.IsNotFound(subnetErr): 536 logger.Infof("no matching subnets in zone %q; assuming zone is constrained and trying another", zone) 537 continue 538 case subnetErr != nil: 539 return nil, errors.Annotatef(subnetErr, "getting subnets for zone %q", zone) 540 case len(subnetIDsForZone) > 1: 541 // With multiple equally suitable subnets, picking one at random 542 // will allow for better instance spread within the same zone, and 543 // still work correctly if we happen to pick a constrained subnet 544 // (we'll just treat this the same way we treat constrained zones 545 // and retry). 546 runArgs.SubnetId = subnetIDsForZone[rand.Intn(len(subnetIDsForZone))] 547 logger.Infof( 548 "selected random subnet %q from all matching in zone %q: %v", 549 runArgs.SubnetId, zone, subnetIDsForZone, 550 ) 551 case len(subnetIDsForZone) == 1: 552 runArgs.SubnetId = subnetIDsForZone[0] 553 logger.Infof("selected subnet %q in zone %q", runArgs.SubnetId, zone) 554 } 555 556 instResp, err = runInstances(e.ec2(), runArgs) 557 if err == nil || !isZoneOrSubnetConstrainedError(err) { 558 break 559 } 560 561 logger.Infof("%q is constrained, trying another availability zone", zone) 562 } 563 564 if err != nil { 565 return nil, errors.Annotate(err, "cannot run instances") 566 } 567 if len(instResp.Instances) != 1 { 568 return nil, errors.Errorf("expected 1 started instance, got %d", len(instResp.Instances)) 569 } 570 571 inst = &ec2Instance{ 572 e: e, 573 Instance: &instResp.Instances[0], 574 } 575 instAZ, instSubnet := inst.Instance.AvailZone, inst.Instance.SubnetId 576 chosenVPCID := e.ecfg().vpcID() 577 logger.Infof("started instance %q in AZ %q, subnet %q, VPC %q", inst.Id(), instAZ, instSubnet, chosenVPCID) 578 579 // Tag instance, for accounting and identification. 580 instanceName := resourceName( 581 names.NewMachineTag(args.InstanceConfig.MachineId), e.Config().Name(), 582 ) 583 args.InstanceConfig.Tags[tagName] = instanceName 584 if err := tagResources(e.ec2(), args.InstanceConfig.Tags, string(inst.Id())); err != nil { 585 return nil, errors.Annotate(err, "tagging instance") 586 } 587 588 // Tag the machine's root EBS volume, if it has one. 589 if inst.Instance.RootDeviceType == "ebs" { 590 tags := tags.ResourceTags( 591 names.NewModelTag(cfg.UUID()), 592 names.NewModelTag(cfg.ControllerUUID()), 593 cfg, 594 ) 595 tags[tagName] = instanceName + "-root" 596 if err := tagRootDisk(e.ec2(), tags, inst.Instance); err != nil { 597 return nil, errors.Annotate(err, "tagging root disk") 598 } 599 } 600 601 hc := instance.HardwareCharacteristics{ 602 Arch: &spec.Image.Arch, 603 Mem: &spec.InstanceType.Mem, 604 CpuCores: &spec.InstanceType.CpuCores, 605 CpuPower: spec.InstanceType.CpuPower, 606 RootDisk: &rootDiskSize, 607 // Tags currently not supported by EC2 608 AvailabilityZone: &inst.Instance.AvailZone, 609 } 610 return &environs.StartInstanceResult{ 611 Instance: inst, 612 Hardware: &hc, 613 }, nil 614 } 615 616 // tagResources calls ec2.CreateTags, tagging each of the specified resources 617 // with the given tags. tagResources will retry for a short period of time 618 // if it receives a *.NotFound error response from EC2. 619 func tagResources(e *ec2.EC2, tags map[string]string, resourceIds ...string) error { 620 if len(tags) == 0 { 621 return nil 622 } 623 ec2Tags := make([]ec2.Tag, 0, len(tags)) 624 for k, v := range tags { 625 ec2Tags = append(ec2Tags, ec2.Tag{k, v}) 626 } 627 var err error 628 for a := shortAttempt.Start(); a.Next(); { 629 _, err = e.CreateTags(resourceIds, ec2Tags) 630 if err == nil || !strings.HasSuffix(ec2ErrCode(err), ".NotFound") { 631 return err 632 } 633 } 634 return err 635 } 636 637 func tagRootDisk(e *ec2.EC2, tags map[string]string, inst *ec2.Instance) error { 638 if len(tags) == 0 { 639 return nil 640 } 641 findVolumeId := func(inst *ec2.Instance) string { 642 for _, m := range inst.BlockDeviceMappings { 643 if m.DeviceName != inst.RootDeviceName { 644 continue 645 } 646 return m.VolumeId 647 } 648 return "" 649 } 650 // Wait until the instance has an associated EBS volume in the 651 // block-device-mapping. 652 volumeId := findVolumeId(inst) 653 waitRootDiskAttempt := utils.AttemptStrategy{ 654 Total: 5 * time.Minute, 655 Delay: 5 * time.Second, 656 } 657 for a := waitRootDiskAttempt.Start(); volumeId == "" && a.Next(); { 658 resp, err := e.Instances([]string{inst.InstanceId}, nil) 659 if err != nil { 660 return err 661 } 662 if len(resp.Reservations) > 0 && len(resp.Reservations[0].Instances) > 0 { 663 inst = &resp.Reservations[0].Instances[0] 664 volumeId = findVolumeId(inst) 665 } 666 } 667 if volumeId == "" { 668 return errors.New("timed out waiting for EBS volume to be associated") 669 } 670 return tagResources(e, tags, volumeId) 671 } 672 673 var runInstances = _runInstances 674 675 // runInstances calls ec2.RunInstances for a fixed number of attempts until 676 // RunInstances returns an error code that does not indicate an error that 677 // may be caused by eventual consistency. 678 func _runInstances(e *ec2.EC2, ri *ec2.RunInstances) (resp *ec2.RunInstancesResp, err error) { 679 for a := shortAttempt.Start(); a.Next(); { 680 resp, err = e.RunInstances(ri) 681 if err == nil || !isNotFoundError(err) { 682 break 683 } 684 } 685 return resp, err 686 } 687 688 func (e *environ) StopInstances(ids ...instance.Id) error { 689 return errors.Trace(e.terminateInstances(ids)) 690 } 691 692 // groupInfoByName returns information on the security group 693 // with the given name including rules and other details. 694 func (e *environ) groupInfoByName(groupName string) (ec2.SecurityGroupInfo, error) { 695 resp, err := e.securityGroupsByNameOrID(groupName) 696 if err != nil { 697 return ec2.SecurityGroupInfo{}, err 698 } 699 700 if len(resp.Groups) != 1 { 701 return ec2.SecurityGroupInfo{}, errors.NewNotFound(fmt.Errorf( 702 "expected one security group named %q, got %v", 703 groupName, resp.Groups, 704 ), "") 705 } 706 return resp.Groups[0], nil 707 } 708 709 // groupByName returns the security group with the given name. 710 func (e *environ) groupByName(groupName string) (ec2.SecurityGroup, error) { 711 groupInfo, err := e.groupInfoByName(groupName) 712 return groupInfo.SecurityGroup, err 713 } 714 715 // isNotFoundError returns whether err is a typed NotFoundError or an EC2 error 716 // code for "group not found", indicating no matching instances (as they are 717 // filtered by group). 718 func isNotFoundError(err error) bool { 719 return err != nil && (errors.IsNotFound(err) || ec2ErrCode(err) == "InvalidGroup.NotFound") 720 } 721 722 // Instances is part of the environs.Environ interface. 723 func (e *environ) Instances(ids []instance.Id) ([]instance.Instance, error) { 724 if len(ids) == 0 { 725 return nil, nil 726 } 727 insts := make([]instance.Instance, len(ids)) 728 // Make a series of requests to cope with eventual consistency. 729 // Each request will attempt to add more instances to the requested 730 // set. 731 var err error 732 for a := shortAttempt.Start(); a.Next(); { 733 var need []string 734 for i, inst := range insts { 735 if inst == nil { 736 need = append(need, string(ids[i])) 737 } 738 } 739 filter := ec2.NewFilter() 740 filter.Add("instance-state-name", aliveInstanceStates...) 741 filter.Add("instance-id", need...) 742 e.addModelFilter(filter) 743 err = e.gatherInstances(ids, insts, filter) 744 if err == nil || err != environs.ErrPartialInstances { 745 break 746 } 747 } 748 if err == environs.ErrPartialInstances { 749 for _, inst := range insts { 750 if inst != nil { 751 return insts, environs.ErrPartialInstances 752 } 753 } 754 return nil, environs.ErrNoInstances 755 } 756 if err != nil { 757 return nil, err 758 } 759 return insts, nil 760 } 761 762 // gatherInstances tries to get information on each instance 763 // id whose corresponding insts slot is nil. 764 // 765 // This function returns environs.ErrPartialInstances if the 766 // insts slice has not been completely filled. 767 func (e *environ) gatherInstances( 768 ids []instance.Id, 769 insts []instance.Instance, 770 filter *ec2.Filter, 771 ) error { 772 resp, err := e.ec2().Instances(nil, filter) 773 if err != nil { 774 return err 775 } 776 n := 0 777 // For each requested id, add it to the returned instances 778 // if we find it in the response. 779 for i, id := range ids { 780 if insts[i] != nil { 781 n++ 782 continue 783 } 784 for j := range resp.Reservations { 785 r := &resp.Reservations[j] 786 for k := range r.Instances { 787 if r.Instances[k].InstanceId != string(id) { 788 continue 789 } 790 inst := r.Instances[k] 791 // TODO(wallyworld): lookup the details to fill in the instance type data 792 insts[i] = &ec2Instance{e: e, Instance: &inst} 793 n++ 794 } 795 } 796 } 797 if n < len(ids) { 798 return environs.ErrPartialInstances 799 } 800 return nil 801 } 802 803 func (e *environ) fetchNetworkInterfaceId(ec2Inst *ec2.EC2, instId instance.Id) (string, error) { 804 var err error 805 var instancesResp *ec2.InstancesResp 806 for a := shortAttempt.Start(); a.Next(); { 807 instancesResp, err = ec2Inst.Instances([]string{string(instId)}, nil) 808 if err == nil { 809 break 810 } 811 logger.Tracef("Instances(%q) returned: %v", instId, err) 812 } 813 if err != nil { 814 // either the instance doesn't exist or we couldn't get through to 815 // the ec2 api 816 return "", err 817 } 818 819 if len(instancesResp.Reservations) == 0 { 820 return "", errors.New("unexpected AWS response: reservation not found") 821 } 822 if len(instancesResp.Reservations[0].Instances) == 0 { 823 return "", errors.New("unexpected AWS response: instance not found") 824 } 825 if len(instancesResp.Reservations[0].Instances[0].NetworkInterfaces) == 0 { 826 return "", errors.New("unexpected AWS response: network interface not found") 827 } 828 networkInterfaceId := instancesResp.Reservations[0].Instances[0].NetworkInterfaces[0].Id 829 return networkInterfaceId, nil 830 } 831 832 // AllocateAddress requests an address to be allocated for the given 833 // instance on the given subnet. Implements NetworkingEnviron.AllocateAddress. 834 func (e *environ) AllocateAddress(instId instance.Id, _ network.Id, addr *network.Address, _, _ string) (err error) { 835 if !environs.AddressAllocationEnabled(provider.EC2) { 836 return errors.NotSupportedf("address allocation") 837 } 838 if addr == nil || addr.Value == "" { 839 return errors.NewNotValid(nil, "invalid address: nil or empty") 840 } 841 842 defer errors.DeferredAnnotatef(&err, "failed to allocate address %q for instance %q", addr, instId) 843 844 var nicId string 845 ec2Inst := e.ec2() 846 nicId, err = e.fetchNetworkInterfaceId(ec2Inst, instId) 847 if err != nil { 848 return errors.Trace(err) 849 } 850 for a := shortAttempt.Start(); a.Next(); { 851 err = AssignPrivateIPAddress(ec2Inst, nicId, *addr) 852 logger.Tracef("AssignPrivateIPAddresses(%v, %v) returned: %v", nicId, *addr, err) 853 if err == nil { 854 logger.Tracef("allocated address %v for instance %v, NIC %v", *addr, instId, nicId) 855 break 856 } 857 if ec2Err, ok := err.(*ec2.Error); ok { 858 if ec2Err.Code == invalidParameterValue { 859 // Note: this Code is also used if we specify 860 // an IP address outside the subnet. Take care! 861 logger.Tracef("address %q not available for allocation", *addr) 862 return environs.ErrIPAddressUnavailable 863 } else if ec2Err.Code == privateAddressLimitExceeded { 864 logger.Tracef("no more addresses available on the subnet") 865 return environs.ErrIPAddressesExhausted 866 } 867 } 868 869 } 870 return err 871 } 872 873 // ReleaseAddress releases a specific address previously allocated with 874 // AllocateAddress. Implements NetworkingEnviron.ReleaseAddress. 875 func (e *environ) ReleaseAddress(instId instance.Id, _ network.Id, addr network.Address, _, _ string) (err error) { 876 if !environs.AddressAllocationEnabled(provider.EC2) { 877 return errors.NotSupportedf("address allocation") 878 } 879 880 defer errors.DeferredAnnotatef(&err, "failed to release address %q from instance %q", addr, instId) 881 882 // If the instance ID is unknown the address has already been released 883 // and we can ignore this request. 884 if instId == instance.UnknownId { 885 logger.Debugf("release address %q with an unknown instance ID is a no-op (ignoring)", addr.Value) 886 return nil 887 } 888 889 var nicId string 890 ec2Inst := e.ec2() 891 nicId, err = e.fetchNetworkInterfaceId(ec2Inst, instId) 892 if err != nil { 893 return errors.Trace(err) 894 } 895 for a := shortAttempt.Start(); a.Next(); { 896 _, err = ec2Inst.UnassignPrivateIPAddresses(nicId, []string{addr.Value}) 897 logger.Tracef("UnassignPrivateIPAddresses(%q, %q) returned: %v", nicId, addr, err) 898 if err == nil { 899 logger.Tracef("released address %q from instance %q, NIC %q", addr, instId, nicId) 900 break 901 } 902 } 903 return err 904 } 905 906 // NetworkInterfaces implements NetworkingEnviron.NetworkInterfaces. 907 func (e *environ) NetworkInterfaces(instId instance.Id) ([]network.InterfaceInfo, error) { 908 ec2Client := e.ec2() 909 var err error 910 var networkInterfacesResp *ec2.NetworkInterfacesResp 911 for a := shortAttempt.Start(); a.Next(); { 912 logger.Tracef("retrieving NICs for instance %q", instId) 913 filter := ec2.NewFilter() 914 filter.Add("attachment.instance-id", string(instId)) 915 networkInterfacesResp, err = ec2Client.NetworkInterfaces(nil, filter) 916 logger.Tracef("instance %q NICs: %#v (err: %v)", instId, networkInterfacesResp, err) 917 if err != nil { 918 logger.Warningf("failed to get instance %q interfaces: %v (retrying)", instId, err) 919 continue 920 } 921 if len(networkInterfacesResp.Interfaces) == 0 { 922 logger.Tracef("instance %q has no NIC attachment yet, retrying...", instId) 923 continue 924 } 925 logger.Tracef("found instance %q NICS: %#v", instId, networkInterfacesResp.Interfaces) 926 break 927 } 928 if err != nil { 929 // either the instance doesn't exist or we couldn't get through to 930 // the ec2 api 931 return nil, errors.Annotatef(err, "cannot get instance %q network interfaces", instId) 932 } 933 ec2Interfaces := networkInterfacesResp.Interfaces 934 result := make([]network.InterfaceInfo, len(ec2Interfaces)) 935 for i, iface := range ec2Interfaces { 936 resp, err := ec2Client.Subnets([]string{iface.SubnetId}, nil) 937 if err != nil { 938 return nil, errors.Annotatef(err, "failed to retrieve subnet %q info", iface.SubnetId) 939 } 940 if len(resp.Subnets) != 1 { 941 return nil, errors.Errorf("expected 1 subnet, got %d", len(resp.Subnets)) 942 } 943 subnet := resp.Subnets[0] 944 cidr := subnet.CIDRBlock 945 946 result[i] = network.InterfaceInfo{ 947 DeviceIndex: iface.Attachment.DeviceIndex, 948 MACAddress: iface.MACAddress, 949 CIDR: cidr, 950 ProviderId: network.Id(iface.Id), 951 ProviderSubnetId: network.Id(iface.SubnetId), 952 AvailabilityZones: []string{subnet.AvailZone}, 953 VLANTag: 0, // Not supported on EC2. 954 // Getting the interface name is not supported on EC2, so fake it. 955 InterfaceName: fmt.Sprintf("unsupported%d", iface.Attachment.DeviceIndex), 956 Disabled: false, 957 NoAutoStart: false, 958 ConfigType: network.ConfigDHCP, 959 InterfaceType: network.EthernetInterface, 960 Address: network.NewScopedAddress(iface.PrivateIPAddress, network.ScopeCloudLocal), 961 } 962 } 963 return result, nil 964 } 965 966 func makeSubnetInfo(cidr string, subnetId network.Id, availZones []string) (network.SubnetInfo, error) { 967 ip, ipnet, err := net.ParseCIDR(cidr) 968 if err != nil { 969 logger.Warningf("skipping subnet %q, invalid CIDR: %v", cidr, err) 970 return network.SubnetInfo{}, err 971 } 972 // ec2 only uses IPv4 addresses for subnets 973 start, err := network.IPv4ToDecimal(ip) 974 if err != nil { 975 logger.Warningf("skipping subnet %q, invalid IP: %v", cidr, err) 976 return network.SubnetInfo{}, err 977 } 978 // First four addresses in a subnet are reserved, see 979 // http://goo.gl/rrWTIo 980 allocatableLow := network.DecimalToIPv4(start + 4) 981 982 ones, bits := ipnet.Mask.Size() 983 zeros := bits - ones 984 numIPs := uint32(1) << uint32(zeros) 985 highIP := start + numIPs - 1 986 // The last address in a subnet is also reserved (see same ref). 987 allocatableHigh := network.DecimalToIPv4(highIP - 1) 988 989 info := network.SubnetInfo{ 990 CIDR: cidr, 991 ProviderId: subnetId, 992 VLANTag: 0, // Not supported on EC2 993 AllocatableIPLow: allocatableLow, 994 AllocatableIPHigh: allocatableHigh, 995 AvailabilityZones: availZones, 996 } 997 logger.Tracef("found subnet with info %#v", info) 998 return info, nil 999 1000 } 1001 1002 // Spaces is not implemented by the ec2 provider as we don't currently have 1003 // provider level spaces. 1004 func (e *environ) Spaces() ([]network.SpaceInfo, error) { 1005 return nil, errors.NotSupportedf("Spaces") 1006 } 1007 1008 // Subnets returns basic information about the specified subnets known 1009 // by the provider for the specified instance or list of ids. subnetIds can be 1010 // empty, in which case all known are returned. Implements 1011 // NetworkingEnviron.Subnets. 1012 func (e *environ) Subnets(instId instance.Id, subnetIds []network.Id) ([]network.SubnetInfo, error) { 1013 var results []network.SubnetInfo 1014 subIdSet := make(map[string]bool) 1015 for _, subId := range subnetIds { 1016 subIdSet[string(subId)] = false 1017 } 1018 1019 if instId != instance.UnknownId { 1020 interfaces, err := e.NetworkInterfaces(instId) 1021 if err != nil { 1022 return results, errors.Trace(err) 1023 } 1024 if len(subnetIds) == 0 { 1025 for _, iface := range interfaces { 1026 subIdSet[string(iface.ProviderSubnetId)] = false 1027 } 1028 } 1029 for _, iface := range interfaces { 1030 _, ok := subIdSet[string(iface.ProviderSubnetId)] 1031 if !ok { 1032 logger.Tracef("subnet %q not in %v, skipping", iface.ProviderSubnetId, subnetIds) 1033 continue 1034 } 1035 subIdSet[string(iface.ProviderSubnetId)] = true 1036 info, err := makeSubnetInfo(iface.CIDR, iface.ProviderSubnetId, iface.AvailabilityZones) 1037 if err != nil { 1038 // Error will already have been logged. 1039 continue 1040 } 1041 results = append(results, info) 1042 } 1043 } else { 1044 ec2Inst := e.ec2() 1045 resp, err := ec2Inst.Subnets(nil, nil) 1046 if err != nil { 1047 return nil, errors.Annotatef(err, "failed to retrieve subnets") 1048 } 1049 if len(subnetIds) == 0 { 1050 for _, subnet := range resp.Subnets { 1051 subIdSet[subnet.Id] = false 1052 } 1053 } 1054 1055 for _, subnet := range resp.Subnets { 1056 _, ok := subIdSet[subnet.Id] 1057 if !ok { 1058 logger.Tracef("subnet %q not in %v, skipping", subnet.Id, subnetIds) 1059 continue 1060 } 1061 subIdSet[subnet.Id] = true 1062 cidr := subnet.CIDRBlock 1063 info, err := makeSubnetInfo(cidr, network.Id(subnet.Id), []string{subnet.AvailZone}) 1064 if err != nil { 1065 // Error will already have been logged. 1066 continue 1067 } 1068 results = append(results, info) 1069 1070 } 1071 } 1072 1073 notFound := []string{} 1074 for subId, found := range subIdSet { 1075 if !found { 1076 notFound = append(notFound, subId) 1077 } 1078 } 1079 if len(notFound) != 0 { 1080 return nil, errors.Errorf("failed to find the following subnet ids: %v", notFound) 1081 } 1082 1083 return results, nil 1084 } 1085 1086 func getTagByKey(key string, ec2Tags []ec2.Tag) (string, bool) { 1087 for _, tag := range ec2Tags { 1088 if tag.Key == key { 1089 return tag.Value, true 1090 } 1091 } 1092 return "", false 1093 } 1094 1095 // AllInstances is part of the environs.InstanceBroker interface. 1096 func (e *environ) AllInstances() ([]instance.Instance, error) { 1097 return e.AllInstancesByState("pending", "running") 1098 } 1099 1100 // AllInstancesByState returns all instances in the environment 1101 // with one of the specified instance states. 1102 func (e *environ) AllInstancesByState(states ...string) ([]instance.Instance, error) { 1103 // NOTE(axw) we use security group filtering here because instances 1104 // start out untagged. If Juju were to abort after starting an instance, 1105 // but before tagging it, it would be leaked. We only need to do this 1106 // for AllInstances, as it is the result of AllInstances that is used 1107 // in "harvesting" unknown instances by the provisioner. 1108 // 1109 // One possible alternative is to modify ec2.RunInstances to allow the 1110 // caller to specify ClientToken, and then format it like 1111 // <controller-uuid>:<model-uuid>:<machine-id> 1112 // (with base64-encoding to keep the size under the 64-byte limit) 1113 // 1114 // It is possible to filter on "client-token", and specify wildcards; 1115 // therefore we could use client-token filters everywhere in the ec2 1116 // provider instead of tags or security groups. The only danger is if 1117 // we need to make non-idempotent calls to RunInstances for the machine 1118 // ID. I don't think this is needed, but I am not confident enough to 1119 // change this fundamental right now. 1120 // 1121 // An EC2 API call is required to resolve the group name to an id, as 1122 // VPC enabled accounts do not support name based filtering. 1123 groupName := e.jujuGroupName() 1124 group, err := e.groupByName(groupName) 1125 if isNotFoundError(err) { 1126 // If there's no group, then there cannot be any instances. 1127 return nil, nil 1128 } else if err != nil { 1129 return nil, errors.Trace(err) 1130 } 1131 filter := ec2.NewFilter() 1132 filter.Add("instance-state-name", states...) 1133 filter.Add("instance.group-id", group.Id) 1134 return e.allInstances(filter) 1135 } 1136 1137 // ControllerInstances is part of the environs.Environ interface. 1138 func (e *environ) ControllerInstances() ([]instance.Id, error) { 1139 filter := ec2.NewFilter() 1140 filter.Add("instance-state-name", aliveInstanceStates...) 1141 filter.Add(fmt.Sprintf("tag:%s", tags.JujuIsController), "true") 1142 e.addModelFilter(filter) 1143 ids, err := e.allInstanceIDs(filter) 1144 if err != nil { 1145 return nil, errors.Trace(err) 1146 } 1147 if len(ids) == 0 { 1148 return nil, environs.ErrNotBootstrapped 1149 } 1150 return ids, nil 1151 } 1152 1153 // allControllerManagedInstances returns the IDs of all instances managed by 1154 // this environment's controller. 1155 // 1156 // Note that this requires that all instances are tagged; we cannot filter on 1157 // security groups, as we do not know the names of the models. 1158 func (e *environ) allControllerManagedInstances() ([]instance.Id, error) { 1159 filter := ec2.NewFilter() 1160 filter.Add("instance-state-name", aliveInstanceStates...) 1161 e.addControllerFilter(filter) 1162 return e.allInstanceIDs(filter) 1163 } 1164 1165 func (e *environ) allInstanceIDs(filter *ec2.Filter) ([]instance.Id, error) { 1166 insts, err := e.allInstances(filter) 1167 if err != nil { 1168 return nil, errors.Trace(err) 1169 } 1170 ids := make([]instance.Id, len(insts)) 1171 for i, inst := range insts { 1172 ids[i] = inst.Id() 1173 } 1174 return ids, nil 1175 } 1176 1177 func (e *environ) allInstances(filter *ec2.Filter) ([]instance.Instance, error) { 1178 resp, err := e.ec2().Instances(nil, filter) 1179 if err != nil { 1180 return nil, errors.Annotate(err, "listing instances") 1181 } 1182 var insts []instance.Instance 1183 for _, r := range resp.Reservations { 1184 for i := range r.Instances { 1185 inst := r.Instances[i] 1186 // TODO(wallyworld): lookup the details to fill in the instance type data 1187 insts = append(insts, &ec2Instance{e: e, Instance: &inst}) 1188 } 1189 } 1190 return insts, nil 1191 } 1192 1193 // Destroy is part of the environs.Environ interface. 1194 func (e *environ) Destroy() error { 1195 cfg := e.Config() 1196 if cfg.UUID() == cfg.ControllerUUID() { 1197 // In case any hosted environment hasn't been cleaned up yet, 1198 // we also attempt to delete their resources when the controller 1199 // environment is destroyed. 1200 if err := e.destroyControllerManagedEnvirons(); err != nil { 1201 return errors.Annotate(err, "destroying managed environs") 1202 } 1203 } 1204 if err := common.Destroy(e); err != nil { 1205 return errors.Trace(err) 1206 } 1207 if err := e.cleanEnvironmentSecurityGroups(); err != nil { 1208 return errors.Annotate(err, "cannot delete environment security groups") 1209 } 1210 return nil 1211 } 1212 1213 // destroyControllerManagedEnvirons destroys all environments managed by this 1214 // environment's controller. 1215 func (e *environ) destroyControllerManagedEnvirons() error { 1216 1217 // Terminate all instances managed by the controller. 1218 instIds, err := e.allControllerManagedInstances() 1219 if err != nil { 1220 return errors.Annotate(err, "listing instances") 1221 } 1222 if err := e.terminateInstances(instIds); err != nil { 1223 return errors.Annotate(err, "terminating instances") 1224 } 1225 1226 // Delete all volumes managed by the controller. 1227 volIds, err := e.allControllerManagedVolumes() 1228 if err != nil { 1229 return errors.Annotate(err, "listing volumes") 1230 } 1231 errs := destroyVolumes(e.ec2(), volIds) 1232 for i, err := range errs { 1233 if err == nil { 1234 continue 1235 } 1236 return errors.Annotatef(err, "destroying volume %q", volIds[i], err) 1237 } 1238 1239 // Delete security groups managed by the controller. 1240 groups, err := e.controllerSecurityGroups() 1241 if err != nil { 1242 return errors.Trace(err) 1243 } 1244 for _, g := range groups { 1245 if err := deleteSecurityGroupInsistently(e.ec2(), g, clock.WallClock); err != nil { 1246 return errors.Annotatef( 1247 err, "cannot delete security group %q (%q)", 1248 g.Name, g.Id, 1249 ) 1250 } 1251 } 1252 return nil 1253 } 1254 1255 func (e *environ) allControllerManagedVolumes() ([]string, error) { 1256 filter := ec2.NewFilter() 1257 e.addControllerFilter(filter) 1258 return listVolumes(e.ec2(), filter) 1259 } 1260 1261 func portsToIPPerms(ports []network.PortRange) []ec2.IPPerm { 1262 ipPerms := make([]ec2.IPPerm, len(ports)) 1263 for i, p := range ports { 1264 ipPerms[i] = ec2.IPPerm{ 1265 Protocol: p.Protocol, 1266 FromPort: p.FromPort, 1267 ToPort: p.ToPort, 1268 SourceIPs: []string{"0.0.0.0/0"}, 1269 } 1270 } 1271 return ipPerms 1272 } 1273 1274 func (e *environ) openPortsInGroup(name string, ports []network.PortRange) error { 1275 if len(ports) == 0 { 1276 return nil 1277 } 1278 // Give permissions for anyone to access the given ports. 1279 g, err := e.groupByName(name) 1280 if err != nil { 1281 return err 1282 } 1283 ipPerms := portsToIPPerms(ports) 1284 _, err = e.ec2().AuthorizeSecurityGroup(g, ipPerms) 1285 if err != nil && ec2ErrCode(err) == "InvalidPermission.Duplicate" { 1286 if len(ports) == 1 { 1287 return nil 1288 } 1289 // If there's more than one port and we get a duplicate error, 1290 // then we go through authorizing each port individually, 1291 // otherwise the ports that were *not* duplicates will have 1292 // been ignored 1293 for i := range ipPerms { 1294 _, err := e.ec2().AuthorizeSecurityGroup(g, ipPerms[i:i+1]) 1295 if err != nil && ec2ErrCode(err) != "InvalidPermission.Duplicate" { 1296 return fmt.Errorf("cannot open port %v: %v", ipPerms[i], err) 1297 } 1298 } 1299 return nil 1300 } 1301 if err != nil { 1302 return fmt.Errorf("cannot open ports: %v", err) 1303 } 1304 return nil 1305 } 1306 1307 func (e *environ) closePortsInGroup(name string, ports []network.PortRange) error { 1308 if len(ports) == 0 { 1309 return nil 1310 } 1311 // Revoke permissions for anyone to access the given ports. 1312 // Note that ec2 allows the revocation of permissions that aren't 1313 // granted, so this is naturally idempotent. 1314 g, err := e.groupByName(name) 1315 if err != nil { 1316 return err 1317 } 1318 _, err = e.ec2().RevokeSecurityGroup(g, portsToIPPerms(ports)) 1319 if err != nil { 1320 return fmt.Errorf("cannot close ports: %v", err) 1321 } 1322 return nil 1323 } 1324 1325 func (e *environ) portsInGroup(name string) (ports []network.PortRange, err error) { 1326 group, err := e.groupInfoByName(name) 1327 if err != nil { 1328 return nil, err 1329 } 1330 for _, p := range group.IPPerms { 1331 if len(p.SourceIPs) != 1 { 1332 logger.Warningf("unexpected IP permission found: %v", p) 1333 continue 1334 } 1335 ports = append(ports, network.PortRange{ 1336 Protocol: p.Protocol, 1337 FromPort: p.FromPort, 1338 ToPort: p.ToPort, 1339 }) 1340 } 1341 network.SortPortRanges(ports) 1342 return ports, nil 1343 } 1344 1345 func (e *environ) OpenPorts(ports []network.PortRange) error { 1346 if e.Config().FirewallMode() != config.FwGlobal { 1347 return fmt.Errorf("invalid firewall mode %q for opening ports on model", 1348 e.Config().FirewallMode()) 1349 } 1350 if err := e.openPortsInGroup(e.globalGroupName(), ports); err != nil { 1351 return err 1352 } 1353 logger.Infof("opened ports in global group: %v", ports) 1354 return nil 1355 } 1356 1357 func (e *environ) ClosePorts(ports []network.PortRange) error { 1358 if e.Config().FirewallMode() != config.FwGlobal { 1359 return fmt.Errorf("invalid firewall mode %q for closing ports on model", 1360 e.Config().FirewallMode()) 1361 } 1362 if err := e.closePortsInGroup(e.globalGroupName(), ports); err != nil { 1363 return err 1364 } 1365 logger.Infof("closed ports in global group: %v", ports) 1366 return nil 1367 } 1368 1369 func (e *environ) Ports() ([]network.PortRange, error) { 1370 if e.Config().FirewallMode() != config.FwGlobal { 1371 return nil, fmt.Errorf("invalid firewall mode %q for retrieving ports from model", 1372 e.Config().FirewallMode()) 1373 } 1374 return e.portsInGroup(e.globalGroupName()) 1375 } 1376 1377 func (*environ) Provider() environs.EnvironProvider { 1378 return &providerInstance 1379 } 1380 1381 func (e *environ) instanceSecurityGroups(instIDs []instance.Id, states ...string) ([]ec2.SecurityGroup, error) { 1382 ec2inst := e.ec2() 1383 strInstID := make([]string, len(instIDs)) 1384 for i := range instIDs { 1385 strInstID[i] = string(instIDs[i]) 1386 } 1387 1388 filter := ec2.NewFilter() 1389 if len(states) > 0 { 1390 filter.Add("instance-state-name", states...) 1391 } 1392 1393 resp, err := ec2inst.Instances(strInstID, filter) 1394 if err != nil { 1395 return nil, errors.Annotatef(err, "cannot retrieve instance information from aws to delete security groups") 1396 } 1397 1398 securityGroups := []ec2.SecurityGroup{} 1399 for _, res := range resp.Reservations { 1400 for _, inst := range res.Instances { 1401 logger.Debugf("instance %q has security groups %+v", inst.InstanceId, inst.SecurityGroups) 1402 securityGroups = append(securityGroups, inst.SecurityGroups...) 1403 } 1404 } 1405 return securityGroups, nil 1406 } 1407 1408 // controllerSecurityGroups returns the details of all security groups managed 1409 // by the environment's controller. 1410 func (e *environ) controllerSecurityGroups() ([]ec2.SecurityGroup, error) { 1411 filter := ec2.NewFilter() 1412 e.addControllerFilter(filter) 1413 resp, err := e.ec2().SecurityGroups(nil, filter) 1414 if err != nil { 1415 return nil, errors.Annotate(err, "listing security groups") 1416 } 1417 groups := make([]ec2.SecurityGroup, len(resp.Groups)) 1418 for i, info := range resp.Groups { 1419 groups[i] = ec2.SecurityGroup{Id: info.Id, Name: info.Name} 1420 } 1421 return groups, nil 1422 } 1423 1424 // cleanEnvironmentSecurityGroups attempts to delete all security groups owned 1425 // by the environment. 1426 func (e *environ) cleanEnvironmentSecurityGroups() error { 1427 jujuGroup := e.jujuGroupName() 1428 g, err := e.groupByName(jujuGroup) 1429 if isNotFoundError(err) { 1430 return nil 1431 } 1432 if err != nil { 1433 return errors.Annotatef(err, "cannot retrieve default security group: %q", jujuGroup) 1434 } 1435 if err := deleteSecurityGroupInsistently(e.ec2(), g, clock.WallClock); err != nil { 1436 return errors.Annotate(err, "cannot delete default security group") 1437 } 1438 return nil 1439 } 1440 1441 func (e *environ) terminateInstances(ids []instance.Id) error { 1442 if len(ids) == 0 { 1443 return nil 1444 } 1445 ec2inst := e.ec2() 1446 1447 // TODO (anastasiamac 2016-04-11) Err if instances still have resources hanging around. 1448 // LP#1568654 1449 defer func() { 1450 e.deleteSecurityGroupsForInstances(ids) 1451 }() 1452 1453 // TODO (anastasiamac 2016-04-7) instance termination would benefit 1454 // from retry with exponential delay just like security groups 1455 // in defer. Bug#1567179. 1456 var err error 1457 for a := shortAttempt.Start(); a.Next(); { 1458 _, err = terminateInstancesById(ec2inst, ids...) 1459 if err == nil || ec2ErrCode(err) != "InvalidInstanceID.NotFound" { 1460 // This will return either success at terminating all instances (1st condition) or 1461 // encountered error as long as it's not NotFound (2nd condition). 1462 return err 1463 } 1464 } 1465 1466 // We will get here only if we got a NotFound error. 1467 // 1. If we attempted to terminate only one instance was, return now. 1468 if len(ids) == 1 { 1469 ids = nil 1470 return nil 1471 } 1472 // 2. If we attempted to terminate several instances and got a NotFound error, 1473 // it means that no instances were terminated. 1474 // So try each instance individually, ignoring a NotFound error this time. 1475 deletedIDs := []instance.Id{} 1476 for _, id := range ids { 1477 _, err = terminateInstancesById(ec2inst, id) 1478 if err == nil { 1479 deletedIDs = append(deletedIDs, id) 1480 } 1481 if err != nil && ec2ErrCode(err) != "InvalidInstanceID.NotFound" { 1482 ids = deletedIDs 1483 return err 1484 } 1485 } 1486 // We will get here if all of the instances are deleted successfully, 1487 // or are not found, which implies they were previously deleted. 1488 ids = deletedIDs 1489 return nil 1490 } 1491 1492 var terminateInstancesById = func(ec2inst *ec2.EC2, ids ...instance.Id) (*ec2.TerminateInstancesResp, error) { 1493 strs := make([]string, len(ids)) 1494 for i, id := range ids { 1495 strs[i] = string(id) 1496 } 1497 return ec2inst.TerminateInstances(strs) 1498 } 1499 1500 func (e *environ) deleteSecurityGroupsForInstances(ids []instance.Id) { 1501 if len(ids) == 0 { 1502 logger.Debugf("no need to delete security groups: no intances were terminated successfully") 1503 return 1504 } 1505 1506 // We only want to attempt deleting security groups for the 1507 // instances that have been successfully terminated. 1508 securityGroups, err := e.instanceSecurityGroups(ids, "shutting-down", "terminated") 1509 if err != nil { 1510 logger.Warningf("cannot determine security groups to delete: %v", err) 1511 } 1512 1513 // TODO(perrito666) we need to tag global security groups to be able 1514 // to tell them apart from future groups that are neither machine 1515 // nor environment group. 1516 // https://bugs.launchpad.net/juju-core/+bug/1534289 1517 jujuGroup := e.jujuGroupName() 1518 1519 ec2inst := e.ec2() 1520 for _, deletable := range securityGroups { 1521 if deletable.Name == jujuGroup { 1522 continue 1523 } 1524 if err := deleteSecurityGroupInsistently(ec2inst, deletable, clock.WallClock); err != nil { 1525 // In ideal world, we would err out here. 1526 // However: 1527 // 1. We do not know if all instances have been terminated. 1528 // If some instances erred out, they may still be using this security group. 1529 // In this case, our failure to delete security group is reasonable: it's still in use. 1530 // 2. Some security groups may be shared by multiple instances, 1531 // for example, global firewalling. We should not delete these. 1532 logger.Warningf("provider failure: %v", err) 1533 } 1534 } 1535 } 1536 1537 // SecurityGroupCleaner defines provider instance methods needed to delete 1538 // a security group. 1539 type SecurityGroupCleaner interface { 1540 1541 // DeleteSecurityGroup deletes security group on the provider. 1542 DeleteSecurityGroup(group ec2.SecurityGroup) (resp *ec2.SimpleResp, err error) 1543 } 1544 1545 var deleteSecurityGroupInsistently = func(inst SecurityGroupCleaner, group ec2.SecurityGroup, clock clock.Clock) error { 1546 var lastErr error 1547 err := retry.Call(retry.CallArgs{ 1548 Attempts: 30, 1549 Delay: time.Second, 1550 MaxDelay: time.Minute, // because 2**29 seconds is beyond reasonable 1551 BackoffFunc: retry.DoubleDelay, 1552 Clock: clock, 1553 Func: func() error { 1554 _, err := inst.DeleteSecurityGroup(group) 1555 if err == nil || isNotFoundError(err) { 1556 return nil 1557 } 1558 return errors.Trace(err) 1559 }, 1560 NotifyFunc: func(err error, attempt int) { 1561 lastErr = err 1562 logger.Infof(fmt.Sprintf("deleting security group %q, attempt %d", group.Name, attempt)) 1563 }, 1564 }) 1565 if err != nil { 1566 logger.Warningf("cannot delete security group %q: consider deleting it manually", group.Name) 1567 return lastErr 1568 } 1569 return nil 1570 } 1571 1572 func (e *environ) addModelFilter(f *ec2.Filter) { 1573 f.Add(fmt.Sprintf("tag:%s", tags.JujuModel), e.uuid()) 1574 } 1575 1576 func (e *environ) addControllerFilter(f *ec2.Filter) { 1577 f.Add(fmt.Sprintf("tag:%s", tags.JujuController), e.Config().ControllerUUID()) 1578 } 1579 1580 func (e *environ) uuid() string { 1581 return e.Config().UUID() 1582 } 1583 1584 func (e *environ) globalGroupName() string { 1585 return fmt.Sprintf("%s-global", e.jujuGroupName()) 1586 } 1587 1588 func (e *environ) machineGroupName(machineId string) string { 1589 return fmt.Sprintf("%s-%s", e.jujuGroupName(), machineId) 1590 } 1591 1592 func (e *environ) jujuGroupName() string { 1593 return "juju-" + e.uuid() 1594 } 1595 1596 // setUpGroups creates the security groups for the new machine, and 1597 // returns them. 1598 // 1599 // Instances are tagged with a group so they can be distinguished from 1600 // other instances that might be running on the same EC2 account. In 1601 // addition, a specific machine security group is created for each 1602 // machine, so that its firewall rules can be configured per machine. 1603 func (e *environ) setUpGroups(machineId string, apiPort int) ([]ec2.SecurityGroup, error) { 1604 1605 // Ensure there's a global group for Juju-related traffic. 1606 jujuGroup, err := e.ensureGroup(e.jujuGroupName(), 1607 []ec2.IPPerm{{ 1608 Protocol: "tcp", 1609 FromPort: 22, 1610 ToPort: 22, 1611 SourceIPs: []string{"0.0.0.0/0"}, 1612 }, { 1613 Protocol: "tcp", 1614 FromPort: apiPort, 1615 ToPort: apiPort, 1616 SourceIPs: []string{"0.0.0.0/0"}, 1617 }, { 1618 Protocol: "tcp", 1619 FromPort: 0, 1620 ToPort: 65535, 1621 }, { 1622 Protocol: "udp", 1623 FromPort: 0, 1624 ToPort: 65535, 1625 }, { 1626 Protocol: "icmp", 1627 FromPort: -1, 1628 ToPort: -1, 1629 }}, 1630 ) 1631 if err != nil { 1632 return nil, err 1633 } 1634 1635 var machineGroup ec2.SecurityGroup 1636 switch e.Config().FirewallMode() { 1637 case config.FwInstance: 1638 machineGroup, err = e.ensureGroup(e.machineGroupName(machineId), nil) 1639 case config.FwGlobal: 1640 machineGroup, err = e.ensureGroup(e.globalGroupName(), nil) 1641 } 1642 if err != nil { 1643 return nil, err 1644 } 1645 return []ec2.SecurityGroup{jujuGroup, machineGroup}, nil 1646 } 1647 1648 // zeroGroup holds the zero security group. 1649 var zeroGroup ec2.SecurityGroup 1650 1651 // securityGroupsByNameOrID calls ec2.SecurityGroups() either with the given 1652 // groupName or with filter by vpc-id and group-name, depending on whether 1653 // vpc-id is empty or not. 1654 func (e *environ) securityGroupsByNameOrID(groupName string) (*ec2.SecurityGroupsResp, error) { 1655 var ( 1656 filter *ec2.Filter 1657 groups []ec2.SecurityGroup 1658 ) 1659 1660 chosenVPCID := e.ecfg().vpcID() 1661 if chosenVPCID != "" { 1662 // AWS VPC API requires both of these filters (and no 1663 // group names/ids set) for non-default EC2-VPC groups: 1664 filter = ec2.NewFilter() 1665 filter.Add("vpc-id", chosenVPCID) 1666 filter.Add("group-name", groupName) 1667 } else { 1668 // EC2-Classic or EC2-VPC with implicit default VPC need to use the 1669 // GroupName.X arguments instead of the filters. 1670 groups = ec2.SecurityGroupNames(groupName) 1671 } 1672 1673 return e.ec2().SecurityGroups(groups, filter) 1674 } 1675 1676 // ensureGroup returns the security group with name and perms. 1677 // If a group with name does not exist, one will be created. 1678 // If it exists, its permissions are set to perms. 1679 // Any entries in perms without SourceIPs will be granted for 1680 // the named group only. 1681 func (e *environ) ensureGroup(name string, perms []ec2.IPPerm) (g ec2.SecurityGroup, err error) { 1682 // Specify explicit VPC ID if needed (not for default VPC). 1683 chosenVPCID := e.ecfg().vpcID() 1684 1685 ec2inst := e.ec2() 1686 resp, err := ec2inst.CreateSecurityGroup(chosenVPCID, name, "juju group") 1687 if err != nil && ec2ErrCode(err) != "InvalidGroup.Duplicate" { 1688 err = errors.Annotatef(err, "creating security group %q (in VPC %q)", name, chosenVPCID) 1689 return zeroGroup, err 1690 } 1691 1692 var have permSet 1693 if err == nil { 1694 g = resp.SecurityGroup 1695 // Tag the created group with the model and controller UUIDs. 1696 cfg := e.Config() 1697 tags := tags.ResourceTags( 1698 names.NewModelTag(cfg.UUID()), 1699 names.NewModelTag(cfg.ControllerUUID()), 1700 cfg, 1701 ) 1702 if err := tagResources(ec2inst, tags, g.Id); err != nil { 1703 return g, errors.Annotate(err, "tagging security group") 1704 } 1705 logger.Debugf("created security group %q in VPC %q with ID %q", name, chosenVPCID, g.Id) 1706 } else { 1707 resp, err := e.securityGroupsByNameOrID(name) 1708 if err != nil { 1709 err = errors.Annotatef(err, "fetching security group %q (in VPC %q)", name, chosenVPCID) 1710 return zeroGroup, err 1711 } 1712 if len(resp.Groups) == 0 { 1713 return zeroGroup, errors.NotFoundf("security group %q in VPC %q", name, chosenVPCID) 1714 } 1715 info := resp.Groups[0] 1716 // It's possible that the old group has the wrong 1717 // description here, but if it does it's probably due 1718 // to something deliberately playing games with juju, 1719 // so we ignore it. 1720 g = info.SecurityGroup 1721 have = newPermSetForGroup(info.IPPerms, g) 1722 } 1723 1724 want := newPermSetForGroup(perms, g) 1725 revoke := make(permSet) 1726 for p := range have { 1727 if !want[p] { 1728 revoke[p] = true 1729 } 1730 } 1731 if len(revoke) > 0 { 1732 _, err := ec2inst.RevokeSecurityGroup(g, revoke.ipPerms()) 1733 if err != nil { 1734 err = errors.Annotatef(err, "revoking security group %q (in VPC %q)", g.Id, chosenVPCID) 1735 return zeroGroup, err 1736 } 1737 } 1738 1739 add := make(permSet) 1740 for p := range want { 1741 if !have[p] { 1742 add[p] = true 1743 } 1744 } 1745 if len(add) > 0 { 1746 _, err := ec2inst.AuthorizeSecurityGroup(g, add.ipPerms()) 1747 if err != nil { 1748 err = errors.Annotatef(err, "authorizing security group %q (in VPC %q)", g.Id, chosenVPCID) 1749 return zeroGroup, err 1750 } 1751 } 1752 return g, nil 1753 } 1754 1755 // permKey represents a permission for a group or an ip address range to access 1756 // the given range of ports. Only one of groupId or ipAddr should be non-empty. 1757 type permKey struct { 1758 protocol string 1759 fromPort int 1760 toPort int 1761 groupId string 1762 ipAddr string 1763 } 1764 1765 type permSet map[permKey]bool 1766 1767 // newPermSetForGroup returns a set of all the permissions in the 1768 // given slice of IPPerms. It ignores the name and owner 1769 // id in source groups, and any entry with no source ips will 1770 // be granted for the given group only. 1771 func newPermSetForGroup(ps []ec2.IPPerm, group ec2.SecurityGroup) permSet { 1772 m := make(permSet) 1773 for _, p := range ps { 1774 k := permKey{ 1775 protocol: p.Protocol, 1776 fromPort: p.FromPort, 1777 toPort: p.ToPort, 1778 } 1779 if len(p.SourceIPs) > 0 { 1780 for _, ip := range p.SourceIPs { 1781 k.ipAddr = ip 1782 m[k] = true 1783 } 1784 } else { 1785 k.groupId = group.Id 1786 m[k] = true 1787 } 1788 } 1789 return m 1790 } 1791 1792 // ipPerms returns m as a slice of permissions usable 1793 // with the ec2 package. 1794 func (m permSet) ipPerms() (ps []ec2.IPPerm) { 1795 // We could compact the permissions, but it 1796 // hardly seems worth it. 1797 for p := range m { 1798 ipp := ec2.IPPerm{ 1799 Protocol: p.protocol, 1800 FromPort: p.fromPort, 1801 ToPort: p.toPort, 1802 } 1803 if p.ipAddr != "" { 1804 ipp.SourceIPs = []string{p.ipAddr} 1805 } else { 1806 ipp.SourceGroups = []ec2.UserSecurityGroup{{Id: p.groupId}} 1807 } 1808 ps = append(ps, ipp) 1809 } 1810 return 1811 } 1812 1813 func isZoneOrSubnetConstrainedError(err error) bool { 1814 return isZoneConstrainedError(err) || isSubnetConstrainedError(err) 1815 } 1816 1817 // isZoneConstrainedError reports whether or not the error indicates 1818 // RunInstances failed due to the specified availability zone being 1819 // constrained for the instance type being provisioned, or is 1820 // otherwise unusable for the specific request made. 1821 func isZoneConstrainedError(err error) bool { 1822 switch err := err.(type) { 1823 case *ec2.Error: 1824 switch err.Code { 1825 case "Unsupported", "InsufficientInstanceCapacity": 1826 // A big hammer, but we've now seen several different error messages 1827 // for constrained zones, and who knows how many more there might 1828 // be. If the message contains "Availability Zone", it's a fair 1829 // bet that it's constrained or otherwise unusable. 1830 return strings.Contains(err.Message, "Availability Zone") 1831 case "InvalidInput": 1832 // If the region has a default VPC, then we will receive an error 1833 // if the AZ does not have a default subnet. Until we have proper 1834 // support for networks, we'll skip over these. 1835 return strings.HasPrefix(err.Message, "No default subnet for availability zone") 1836 case "VolumeTypeNotAvailableInZone": 1837 return true 1838 } 1839 } 1840 return false 1841 } 1842 1843 // isSubnetConstrainedError reports whether or not the error indicates 1844 // RunInstances failed due to the specified VPC subnet ID being constrained for 1845 // the instance type being provisioned, or is otherwise unusable for the 1846 // specific request made. 1847 func isSubnetConstrainedError(err error) bool { 1848 switch err := err.(type) { 1849 case *ec2.Error: 1850 switch err.Code { 1851 case "InsufficientFreeAddressesInSubnet", "InsufficientInstanceCapacity": 1852 // Subnet and/or VPC general limits reached. 1853 return true 1854 case "InvalidSubnetID.NotFound": 1855 // This shouldn't happen, as we validate the subnet IDs, but it can 1856 // happen if the user manually deleted the subnet outside of Juju. 1857 return true 1858 } 1859 } 1860 return false 1861 } 1862 1863 // If the err is of type *ec2.Error, ec2ErrCode returns 1864 // its code, otherwise it returns the empty string. 1865 func ec2ErrCode(err error) string { 1866 ec2err, _ := errors.Cause(err).(*ec2.Error) 1867 if ec2err == nil { 1868 return "" 1869 } 1870 return ec2err.Code 1871 } 1872 1873 func (e *environ) AllocateContainerAddresses(hostInstanceID instance.Id, preparedInfo []network.InterfaceInfo) ([]network.InterfaceInfo, error) { 1874 return nil, errors.NotSupportedf("container address allocation") 1875 }