github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/provider/azure/environ.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package azure 5 6 import ( 7 "encoding/base64" 8 "fmt" 9 "net/http" 10 "regexp" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/utils" 17 "github.com/juju/utils/set" 18 "launchpad.net/gwacl" 19 20 "github.com/juju/juju/cloudconfig/instancecfg" 21 "github.com/juju/juju/constraints" 22 "github.com/juju/juju/environs" 23 "github.com/juju/juju/environs/config" 24 "github.com/juju/juju/environs/imagemetadata" 25 "github.com/juju/juju/environs/instances" 26 "github.com/juju/juju/environs/simplestreams" 27 "github.com/juju/juju/environs/storage" 28 "github.com/juju/juju/instance" 29 "github.com/juju/juju/network" 30 "github.com/juju/juju/provider/common" 31 "github.com/juju/juju/state" 32 "github.com/juju/juju/state/multiwatcher" 33 ) 34 35 const ( 36 // deploymentSlot says in which slot to deploy instances. Azure 37 // supports 'Production' or 'Staging'. 38 // This provider always deploys to Production. Think twice about 39 // changing that: DNS names in the staging slot work differently from 40 // those in the production slot. In Staging, Azure assigns an 41 // arbitrary hostname that we can then extract from the deployment's 42 // URL. In Production, the hostname in the deployment URL does not 43 // actually seem to resolve; instead, the service name is used as the 44 // DNS name, with ".cloudapp.net" appended. 45 deploymentSlot = "Production" 46 47 // Address space of the virtual network used by the nodes in this 48 // environement, in CIDR notation. This is the network used for 49 // machine-to-machine communication. 50 networkDefinition = "10.0.0.0/8" 51 52 // stateServerLabel is the label applied to the cloud service created 53 // for state servers. 54 stateServerLabel = "juju-state-server" 55 ) 56 57 // vars for testing purposes. 58 var ( 59 createInstance = (*azureEnviron).createInstance 60 ) 61 62 type azureEnviron struct { 63 // Except where indicated otherwise, all fields in this object should 64 // only be accessed using a lock or a snapshot. 65 sync.Mutex 66 67 // archMutex gates access to supportedArchitectures 68 archMutex sync.Mutex 69 // supportedArchitectures caches the architectures 70 // for which images can be instantiated. 71 supportedArchitectures []string 72 73 // ecfg is the environment's Azure-specific configuration. 74 ecfg *azureEnvironConfig 75 76 // storage is this environ's own private storage. 77 storage storage.Storage 78 79 // storageAccountKey holds an access key to this environment's 80 // private storage. This is automatically queried from Azure on 81 // startup. 82 storageAccountKey string 83 84 // api is a management API for Microsoft Azure. 85 api *gwacl.ManagementAPI 86 87 // vnet describes the configured virtual network. 88 vnet *gwacl.VirtualNetworkSite 89 90 // availableRoleSizes is the role sizes available in the configured 91 // location. This will be reset whenever the location configuration changes. 92 availableRoleSizes set.Strings 93 } 94 95 // azureEnviron implements Environ and HasRegion. 96 var _ environs.Environ = (*azureEnviron)(nil) 97 var _ simplestreams.HasRegion = (*azureEnviron)(nil) 98 var _ state.Prechecker = (*azureEnviron)(nil) 99 100 // NewEnviron creates a new azureEnviron. 101 func NewEnviron(cfg *config.Config) (*azureEnviron, error) { 102 var env azureEnviron 103 err := env.SetConfig(cfg) 104 if err != nil { 105 return nil, err 106 } 107 108 // Set up storage. 109 env.storage = &azureStorage{ 110 storageContext: &environStorageContext{environ: &env}, 111 } 112 return &env, nil 113 } 114 115 // extractStorageKey returns the primary account key from a gwacl 116 // StorageAccountKeys struct, or if there is none, the secondary one. 117 func extractStorageKey(keys *gwacl.StorageAccountKeys) string { 118 if keys.Primary != "" { 119 return keys.Primary 120 } 121 return keys.Secondary 122 } 123 124 // queryStorageAccountKey retrieves the storage account's key from Azure. 125 func (env *azureEnviron) queryStorageAccountKey() (string, error) { 126 snap := env.getSnapshot() 127 128 accountName := snap.ecfg.storageAccountName() 129 keys, err := snap.api.GetStorageAccountKeys(accountName) 130 if err != nil { 131 return "", errors.Annotate(err, "cannot obtain storage account keys") 132 } 133 134 key := extractStorageKey(keys) 135 if key == "" { 136 return "", errors.New("no keys available for storage account") 137 } 138 139 return key, nil 140 } 141 142 // getSnapshot produces an atomic shallow copy of the environment object. 143 // Whenever you need to access the environment object's fields without 144 // modifying them, get a snapshot and read its fields instead. You will 145 // get a consistent view of the fields without any further locking. 146 // If you do need to modify the environment's fields, do not get a snapshot 147 // but lock the object throughout the critical section. 148 func (env *azureEnviron) getSnapshot() *azureEnviron { 149 env.Lock() 150 defer env.Unlock() 151 152 // Copy the environment. (Not the pointer, the environment itself.) 153 // This is a shallow copy. 154 snap := *env 155 // Reset the snapshot's mutex, because we just copied it while we 156 // were holding it. The snapshot will have a "clean," unlocked mutex. 157 snap.Mutex = sync.Mutex{} 158 return &snap 159 } 160 161 // getAffinityGroupName returns the name of the affinity group used by all 162 // the Services in this environment. The affinity group name is immutable, 163 // so there is no need to use a configuration snapshot. 164 func (env *azureEnviron) getAffinityGroupName() string { 165 return env.getEnvPrefix() + "ag" 166 } 167 168 // getLocation gets the configured Location for the environment. 169 func (env *azureEnviron) getLocation() string { 170 snap := env.getSnapshot() 171 return snap.ecfg.location() 172 } 173 174 func (env *azureEnviron) createAffinityGroup() error { 175 snap := env.getSnapshot() 176 affinityGroupName := env.getAffinityGroupName() 177 location := snap.ecfg.location() 178 cag := gwacl.NewCreateAffinityGroup(affinityGroupName, affinityGroupName, affinityGroupName, location) 179 return snap.api.CreateAffinityGroup(&gwacl.CreateAffinityGroupRequest{ 180 CreateAffinityGroup: cag, 181 }) 182 } 183 184 func (env *azureEnviron) deleteAffinityGroup() error { 185 snap := env.getSnapshot() 186 affinityGroupName := env.getAffinityGroupName() 187 return snap.api.DeleteAffinityGroup(&gwacl.DeleteAffinityGroupRequest{ 188 Name: affinityGroupName, 189 }) 190 } 191 192 // getAvailableRoleSizes returns the role sizes available for the configured 193 // location. 194 func (env *azureEnviron) getAvailableRoleSizes() (_ set.Strings, err error) { 195 defer errors.DeferredAnnotatef(&err, "cannot get available role sizes") 196 197 snap := env.getSnapshot() 198 if snap.availableRoleSizes != nil { 199 return snap.availableRoleSizes, nil 200 } 201 locations, err := snap.api.ListLocations() 202 if err != nil { 203 return nil, errors.Annotate(err, "cannot list locations") 204 } 205 var available set.Strings 206 for _, location := range locations { 207 if location.Name != snap.ecfg.location() { 208 continue 209 } 210 if location.ComputeCapabilities == nil { 211 return nil, errors.Annotate(err, "cannot determine compute capabilities") 212 } 213 available = set.NewStrings(location.ComputeCapabilities.VirtualMachineRoleSizes...) 214 break 215 } 216 if available == nil { 217 return nil, errors.NotFoundf("location %q", snap.ecfg.location()) 218 } 219 env.Lock() 220 env.availableRoleSizes = available 221 env.Unlock() 222 return available, nil 223 } 224 225 // getVirtualNetworkName returns the name of the virtual network used by all 226 // the VMs in this environment. The virtual network name is immutable, 227 // so there is no need to use a configuration snapshot. 228 func (env *azureEnviron) getVirtualNetworkName() string { 229 return env.getEnvPrefix() + "vnet" 230 } 231 232 // getVirtualNetwork returns the virtual network used by all the VMs in this 233 // environment. 234 func (env *azureEnviron) getVirtualNetwork() (*gwacl.VirtualNetworkSite, error) { 235 snap := env.getSnapshot() 236 if snap.vnet != nil { 237 return snap.vnet, nil 238 } 239 cfg, err := env.api.GetNetworkConfiguration() 240 if err != nil { 241 return nil, errors.Annotate(err, "error getting network configuration") 242 } 243 var vnet *gwacl.VirtualNetworkSite 244 vnetName := env.getVirtualNetworkName() 245 if cfg != nil && cfg.VirtualNetworkSites != nil { 246 for _, site := range *cfg.VirtualNetworkSites { 247 if site.Name == vnetName { 248 vnet = &site 249 break 250 } 251 } 252 } 253 if vnet == nil { 254 return nil, errors.NotFoundf("virtual network %q", vnetName) 255 } 256 env.Lock() 257 env.vnet = vnet 258 env.Unlock() 259 return vnet, nil 260 } 261 262 // createVirtualNetwork creates a virtual network for the environment. 263 func (env *azureEnviron) createVirtualNetwork() error { 264 snap := env.getSnapshot() 265 vnetName := env.getVirtualNetworkName() 266 virtualNetwork := gwacl.VirtualNetworkSite{ 267 Name: vnetName, 268 Location: snap.ecfg.location(), 269 AddressSpacePrefixes: []string{ 270 networkDefinition, 271 }, 272 } 273 if err := snap.api.AddVirtualNetworkSite(&virtualNetwork); err != nil { 274 return errors.Trace(err) 275 } 276 env.Lock() 277 env.vnet = &virtualNetwork 278 env.Unlock() 279 return nil 280 } 281 282 // deleteVnetAttempt is an AttemptyStrategy for use 283 // when attempting delete a virtual network. This is 284 // necessary as Azure apparently does not release all 285 // references to the vnet even when all cloud services 286 // are deleted. 287 var deleteVnetAttempt = utils.AttemptStrategy{ 288 Total: 30 * time.Second, 289 Delay: 1 * time.Second, 290 } 291 292 var networkInUse = regexp.MustCompile(".*The virtual network .* is currently in use.*") 293 294 func (env *azureEnviron) deleteVirtualNetwork() error { 295 snap := env.getSnapshot() 296 vnetName := env.getVirtualNetworkName() 297 var err error 298 for a := deleteVnetAttempt.Start(); a.Next(); { 299 err = snap.api.RemoveVirtualNetworkSite(vnetName) 300 if err == nil { 301 return nil 302 } 303 if err, ok := err.(*gwacl.AzureError); ok { 304 if err.StatusCode() == 400 && networkInUse.MatchString(err.Message) { 305 // Retry on "virtual network XYZ is currently in use". 306 continue 307 } 308 } 309 // Any other error should be returned. 310 break 311 } 312 return err 313 } 314 315 // getContainerName returns the name of the private storage account container 316 // that this environment is using. The container name is immutable, 317 // so there is no need to use a configuration snapshot. 318 func (env *azureEnviron) getContainerName() string { 319 return env.getEnvPrefix() + "private" 320 } 321 322 func isHTTPConflict(err error) bool { 323 if err, ok := err.(gwacl.HTTPError); ok { 324 return err.StatusCode() == http.StatusConflict 325 } 326 return false 327 } 328 329 func isVirtualNetworkExist(err error) bool { 330 // TODO(axw) 2014-06-16 #1330473 331 // Add an error type to gwacl for this. 332 s := err.Error() 333 const prefix = "could not add virtual network" 334 const suffix = "already exists" 335 return strings.HasPrefix(s, prefix) && strings.HasSuffix(s, suffix) 336 } 337 338 // Bootstrap is specified in the Environ interface. 339 func (env *azureEnviron) Bootstrap(ctx environs.BootstrapContext, args environs.BootstrapParams) (arch, series string, _ environs.BootstrapFinalizer, err error) { 340 // The creation of the affinity group and the virtual network is specific to the Azure provider. 341 err = env.createAffinityGroup() 342 if err != nil && !isHTTPConflict(err) { 343 return "", "", nil, err 344 } 345 // If we fail after this point, clean up the affinity group. 346 defer func() { 347 if err != nil { 348 env.deleteAffinityGroup() 349 } 350 }() 351 352 err = env.createVirtualNetwork() 353 if err != nil && !isVirtualNetworkExist(err) { 354 return "", "", nil, err 355 } 356 // If we fail after this point, clean up the virtual network. 357 defer func() { 358 if err != nil { 359 env.deleteVirtualNetwork() 360 } 361 }() 362 return common.Bootstrap(ctx, env, args) 363 } 364 365 // isLegacyInstance reports whether the instance is a 366 // legacy instance (i.e. one-to-one cloud service to instance). 367 func isLegacyInstance(inst *azureInstance) (bool, error) { 368 snap := inst.environ.getSnapshot() 369 serviceName := inst.hostedService.ServiceName 370 service, err := snap.api.GetHostedServiceProperties(serviceName, true) 371 if err != nil { 372 return false, err 373 } else if len(service.Deployments) != 1 { 374 return false, nil 375 } 376 deploymentName := service.Deployments[0].Name 377 return deploymentName == deploymentNameV1(serviceName), nil 378 } 379 380 // StateServerInstances is specified in the Environ interface. 381 func (env *azureEnviron) StateServerInstances() ([]instance.Id, error) { 382 // Locate the state-server cloud service, and get its addresses. 383 instances, err := env.AllInstances() 384 if err != nil { 385 return nil, err 386 } 387 var stateServerInstanceIds []instance.Id 388 var loadStateFile bool 389 for _, inst := range instances { 390 azureInstance := inst.(*azureInstance) 391 label := azureInstance.hostedService.Label 392 if decoded, err := base64.StdEncoding.DecodeString(label); err == nil { 393 if string(decoded) == stateServerLabel { 394 stateServerInstanceIds = append(stateServerInstanceIds, inst.Id()) 395 continue 396 } 397 } 398 if !loadStateFile { 399 _, roleName := env.splitInstanceId(azureInstance.Id()) 400 if roleName == "" { 401 loadStateFile = true 402 } 403 } 404 } 405 if loadStateFile { 406 // Some legacy instances were found, so we must load provider-state 407 // to find which instance was the original state server. If we find 408 // a legacy environment, then stateServerInstanceIds will not contain 409 // the original bootstrap instance, which is the only one that will 410 // be in provider-state. 411 instanceIds, err := common.ProviderStateInstances(env, env.Storage()) 412 if err != nil { 413 return nil, err 414 } 415 stateServerInstanceIds = append(stateServerInstanceIds, instanceIds...) 416 } 417 if len(stateServerInstanceIds) == 0 { 418 return nil, environs.ErrNoInstances 419 } 420 return stateServerInstanceIds, nil 421 } 422 423 // Config is specified in the Environ interface. 424 func (env *azureEnviron) Config() *config.Config { 425 snap := env.getSnapshot() 426 return snap.ecfg.Config 427 } 428 429 // SetConfig is specified in the Environ interface. 430 func (env *azureEnviron) SetConfig(cfg *config.Config) error { 431 var oldLocation string 432 if snap := env.getSnapshot(); snap.ecfg != nil { 433 oldLocation = snap.ecfg.location() 434 } 435 436 ecfg, err := azureEnvironProvider{}.newConfig(cfg) 437 if err != nil { 438 return err 439 } 440 441 env.Lock() 442 defer env.Unlock() 443 444 if env.ecfg != nil { 445 _, err = azureEnvironProvider{}.Validate(cfg, env.ecfg.Config) 446 if err != nil { 447 return err 448 } 449 } 450 451 env.ecfg = ecfg 452 453 // Reset storage account key. Even if we had one before, it may not 454 // be appropriate for the new config. 455 env.storageAccountKey = "" 456 457 subscription := ecfg.managementSubscriptionId() 458 certKeyPEM := []byte(ecfg.managementCertificate()) 459 location := ecfg.location() 460 mgtAPI, err := gwacl.NewManagementAPICertDataWithRetryPolicy(subscription, certKeyPEM, certKeyPEM, location, retryPolicy) 461 if err != nil { 462 return errors.Annotate(err, "cannot acquire management API") 463 } 464 env.api = mgtAPI 465 466 // If the location changed, reset the available role sizes. 467 if location != oldLocation { 468 env.availableRoleSizes = nil 469 } 470 471 return nil 472 } 473 474 // attemptCreateService tries to create a new hosted service on Azure, with a 475 // name it chooses (based on the given prefix), but recognizes that the name 476 // may not be available. If the name is not available, it does not treat that 477 // as an error but just returns nil. 478 func attemptCreateService(azure *gwacl.ManagementAPI, prefix, affinityGroupName, label string) (*gwacl.CreateHostedService, error) { 479 var err error 480 name := gwacl.MakeRandomHostedServiceName(prefix) 481 err = azure.CheckHostedServiceNameAvailability(name) 482 if err != nil { 483 // The calling function should retry. 484 return nil, nil 485 } 486 if label == "" { 487 label = name 488 } 489 req := gwacl.NewCreateHostedServiceWithLocation(name, label, "") 490 req.AffinityGroup = affinityGroupName 491 err = azure.AddHostedService(req) 492 if err != nil { 493 return nil, err 494 } 495 return req, nil 496 } 497 498 // newHostedService creates a hosted service. It will make up a unique name, 499 // starting with the given prefix. 500 func newHostedService(azure *gwacl.ManagementAPI, prefix, affinityGroupName, label string) (*gwacl.HostedService, error) { 501 var err error 502 var createdService *gwacl.CreateHostedService 503 for tries := 10; tries > 0 && err == nil && createdService == nil; tries-- { 504 createdService, err = attemptCreateService(azure, prefix, affinityGroupName, label) 505 } 506 if err != nil { 507 return nil, errors.Annotate(err, "could not create hosted service") 508 } 509 if createdService == nil { 510 return nil, fmt.Errorf("could not come up with a unique hosted service name - is your randomizer initialized?") 511 } 512 return azure.GetHostedServiceProperties(createdService.ServiceName, true) 513 } 514 515 // SupportedArchitectures is specified on the EnvironCapability interface. 516 func (env *azureEnviron) SupportedArchitectures() ([]string, error) { 517 env.archMutex.Lock() 518 defer env.archMutex.Unlock() 519 if env.supportedArchitectures != nil { 520 return env.supportedArchitectures, nil 521 } 522 // Create a filter to get all images from our region and for the correct stream. 523 ecfg := env.getSnapshot().ecfg 524 region := ecfg.location() 525 cloudSpec := simplestreams.CloudSpec{ 526 Region: region, 527 Endpoint: getEndpoint(region), 528 } 529 imageConstraint := imagemetadata.NewImageConstraint(simplestreams.LookupParams{ 530 CloudSpec: cloudSpec, 531 Stream: ecfg.ImageStream(), 532 }) 533 var err error 534 env.supportedArchitectures, err = common.SupportedArchitectures(env, imageConstraint) 535 return env.supportedArchitectures, err 536 } 537 538 // selectInstanceTypeAndImage returns the appropriate instances.InstanceType and 539 // the OS image name for launching a virtual machine with the given parameters. 540 func (env *azureEnviron) selectInstanceTypeAndImage(constraint *instances.InstanceConstraint) (*instances.InstanceType, string, error) { 541 ecfg := env.getSnapshot().ecfg 542 sourceImageName := ecfg.forceImageName() 543 if sourceImageName != "" { 544 // Configuration forces us to use a specific image. There may 545 // not be a suitable image in the simplestreams database. 546 // This means we can't use Juju's normal selection mechanism, 547 // because it combines instance-type and image selection: if 548 // there are no images we can use, it won't offer us an 549 // instance type either. 550 // 551 // Select the instance type using simple, Azure-specific code. 552 instanceType, err := selectMachineType(env, defaultToBaselineSpec(constraint.Constraints)) 553 if err != nil { 554 return nil, "", err 555 } 556 return instanceType, sourceImageName, nil 557 } 558 559 // Choose the most suitable instance type and OS image, based on simplestreams information. 560 spec, err := findInstanceSpec(env, constraint) 561 if err != nil { 562 return nil, "", err 563 } 564 return &spec.InstanceType, spec.Image.Id, nil 565 } 566 567 var unsupportedConstraints = []string{ 568 constraints.CpuPower, 569 constraints.Tags, 570 } 571 572 // ConstraintsValidator is defined on the Environs interface. 573 func (env *azureEnviron) ConstraintsValidator() (constraints.Validator, error) { 574 validator := constraints.NewValidator() 575 validator.RegisterUnsupported(unsupportedConstraints) 576 supportedArches, err := env.SupportedArchitectures() 577 if err != nil { 578 return nil, err 579 } 580 validator.RegisterVocabulary(constraints.Arch, supportedArches) 581 582 instanceTypes, err := listInstanceTypes(env) 583 if err != nil { 584 return nil, err 585 } 586 instTypeNames := make([]string, len(instanceTypes)) 587 for i, instanceType := range instanceTypes { 588 instTypeNames[i] = instanceType.Name 589 } 590 validator.RegisterVocabulary(constraints.InstanceType, instTypeNames) 591 validator.RegisterConflicts( 592 []string{constraints.InstanceType}, 593 []string{constraints.Mem, constraints.CpuCores, constraints.Arch, constraints.RootDisk}) 594 595 return validator, nil 596 } 597 598 // PrecheckInstance is defined on the state.Prechecker interface. 599 func (env *azureEnviron) PrecheckInstance(series string, cons constraints.Value, placement string) error { 600 if placement != "" { 601 return fmt.Errorf("unknown placement directive: %s", placement) 602 } 603 if !cons.HasInstanceType() { 604 return nil 605 } 606 // Constraint has an instance-type constraint so let's see if it is valid. 607 instanceTypes, err := listInstanceTypes(env) 608 if err != nil { 609 return err 610 } 611 for _, instanceType := range instanceTypes { 612 if instanceType.Name == *cons.InstanceType { 613 return nil 614 } 615 } 616 return fmt.Errorf("invalid instance type %q", *cons.InstanceType) 617 } 618 619 // createInstance creates all of the Azure entities necessary for a 620 // new instance. This includes Cloud Service, Deployment and Role. 621 // 622 // If serviceName is non-empty, then createInstance will assign to 623 // the Cloud Service with that name. Otherwise, a new Cloud Service 624 // will be created. 625 func (env *azureEnviron) createInstance(azure *gwacl.ManagementAPI, role *gwacl.Role, serviceName string, stateServer bool) (resultInst instance.Instance, resultErr error) { 626 var inst instance.Instance 627 defer func() { 628 if inst != nil && resultErr != nil { 629 if err := env.StopInstances(inst.Id()); err != nil { 630 // Failure upon failure. Log it, but return the original error. 631 logger.Errorf("error releasing failed instance: %v", err) 632 } 633 } 634 }() 635 var err error 636 var service *gwacl.HostedService 637 if serviceName != "" { 638 logger.Debugf("creating instance in existing cloud service %q", serviceName) 639 service, err = azure.GetHostedServiceProperties(serviceName, true) 640 } else { 641 logger.Debugf("creating instance in new cloud service") 642 // If we're creating a cloud service for state servers, 643 // we will want to open additional ports. We need to 644 // record this against the cloud service, so we use a 645 // special label for the purpose. 646 var label string 647 if stateServer { 648 label = stateServerLabel 649 } 650 service, err = newHostedService(azure, env.getEnvPrefix(), env.getAffinityGroupName(), label) 651 } 652 if err != nil { 653 return nil, err 654 } 655 if len(service.Deployments) == 0 { 656 // This is a newly created cloud service, so we 657 // should destroy it if anything below fails. 658 defer func() { 659 if resultErr != nil { 660 azure.DeleteHostedService(service.ServiceName) 661 // Destroying the hosted service destroys the instance, 662 // so ensure StopInstances isn't called. 663 inst = nil 664 } 665 }() 666 // Create an initial deployment. 667 deployment := gwacl.NewDeploymentForCreateVMDeployment( 668 deploymentNameV2(service.ServiceName), 669 deploymentSlot, 670 deploymentNameV2(service.ServiceName), 671 []gwacl.Role{*role}, 672 env.getVirtualNetworkName(), 673 ) 674 if err := azure.AddDeployment(deployment, service.ServiceName); err != nil { 675 return nil, errors.Annotate(err, "error creating VM deployment") 676 } 677 service.Deployments = append(service.Deployments, *deployment) 678 } else { 679 // Update the deployment. 680 deployment := &service.Deployments[0] 681 if err := azure.AddRole(&gwacl.AddRoleRequest{ 682 ServiceName: service.ServiceName, 683 DeploymentName: deployment.Name, 684 PersistentVMRole: (*gwacl.PersistentVMRole)(role), 685 }); err != nil { 686 return nil, err 687 } 688 deployment.RoleList = append(deployment.RoleList, *role) 689 } 690 return env.getInstance(service, role.RoleName) 691 } 692 693 // deploymentNameV1 returns the deployment name used 694 // in the original implementation of the Azure provider. 695 func deploymentNameV1(serviceName string) string { 696 return serviceName 697 } 698 699 // deploymentNameV2 returns the deployment name used 700 // in the current implementation of the Azure provider. 701 func deploymentNameV2(serviceName string) string { 702 return serviceName + "-v2" 703 } 704 705 // MaintainInstance is specified in the InstanceBroker interface. 706 func (*azureEnviron) MaintainInstance(args environs.StartInstanceParams) error { 707 return nil 708 } 709 710 // StartInstance is specified in the InstanceBroker interface. 711 func (env *azureEnviron) StartInstance(args environs.StartInstanceParams) (*environs.StartInstanceResult, error) { 712 if args.InstanceConfig.HasNetworks() { 713 return nil, errors.New("starting instances with networks is not supported yet") 714 } 715 716 err := instancecfg.FinishInstanceConfig(args.InstanceConfig, env.Config()) 717 if err != nil { 718 return nil, err 719 } 720 721 // Pick envtools. Needed for the custom data (which is what we normally 722 // call userdata). 723 args.InstanceConfig.Tools = args.Tools[0] 724 logger.Infof("picked tools %q", args.InstanceConfig.Tools) 725 726 // Compose userdata. 727 userData, err := makeCustomData(args.InstanceConfig) 728 if err != nil { 729 return nil, errors.Annotate(err, "cannot compose user data") 730 } 731 732 snapshot := env.getSnapshot() 733 location := snapshot.ecfg.location() 734 instanceType, sourceImageName, err := env.selectInstanceTypeAndImage(&instances.InstanceConstraint{ 735 Region: location, 736 Series: args.Tools.OneSeries(), 737 Arches: args.Tools.Arches(), 738 Constraints: args.Constraints, 739 }) 740 if err != nil { 741 return nil, err 742 } 743 744 // We use the cloud service label as a way to group instances with 745 // the same affinity, so that machines can be be allocated to the 746 // same availability set. 747 var cloudServiceName string 748 if args.DistributionGroup != nil && snapshot.ecfg.availabilitySetsEnabled() { 749 instanceIds, err := args.DistributionGroup() 750 if err != nil { 751 return nil, err 752 } 753 for _, id := range instanceIds { 754 cloudServiceName, _ = env.splitInstanceId(id) 755 if cloudServiceName != "" { 756 break 757 } 758 } 759 } 760 761 vhd := env.newOSDisk(sourceImageName) 762 // If we're creating machine-0, we'll want to expose port 22. 763 // All other machines get an auto-generated public port for SSH. 764 stateServer := multiwatcher.AnyJobNeedsState(args.InstanceConfig.Jobs...) 765 role := env.newRole(instanceType.Id, vhd, userData, stateServer) 766 inst, err := createInstance(env, snapshot.api, role, cloudServiceName, stateServer) 767 if err != nil { 768 return nil, err 769 } 770 hc := &instance.HardwareCharacteristics{ 771 Mem: &instanceType.Mem, 772 RootDisk: &instanceType.RootDisk, 773 CpuCores: &instanceType.CpuCores, 774 } 775 if len(instanceType.Arches) == 1 { 776 hc.Arch = &instanceType.Arches[0] 777 } 778 return &environs.StartInstanceResult{ 779 Instance: inst, 780 Hardware: hc, 781 }, nil 782 } 783 784 // getInstance returns an up-to-date version of the instance with the given 785 // name. 786 func (env *azureEnviron) getInstance(hostedService *gwacl.HostedService, roleName string) (instance.Instance, error) { 787 if n := len(hostedService.Deployments); n != 1 { 788 return nil, fmt.Errorf("expected one deployment for %q, got %d", hostedService.ServiceName, n) 789 } 790 deployment := &hostedService.Deployments[0] 791 792 var maskStateServerPorts bool 793 var instanceId instance.Id 794 switch deployment.Name { 795 case deploymentNameV1(hostedService.ServiceName): 796 // Old style instance. 797 instanceId = instance.Id(hostedService.ServiceName) 798 if n := len(deployment.RoleList); n != 1 { 799 return nil, fmt.Errorf("expected one role for %q, got %d", deployment.Name, n) 800 } 801 roleName = deployment.RoleList[0].RoleName 802 // In the old implementation of the Azure provider, 803 // all machines opened the state and API server ports. 804 maskStateServerPorts = true 805 806 case deploymentNameV2(hostedService.ServiceName): 807 instanceId = instance.Id(fmt.Sprintf("%s-%s", hostedService.ServiceName, roleName)) 808 // Newly created state server machines are put into 809 // the cloud service with the stateServerLabel label. 810 if decoded, err := base64.StdEncoding.DecodeString(hostedService.Label); err == nil { 811 maskStateServerPorts = string(decoded) == stateServerLabel 812 } 813 } 814 815 var roleInstance *gwacl.RoleInstance 816 for _, role := range deployment.RoleInstanceList { 817 if role.RoleName == roleName { 818 roleInstance = &role 819 break 820 } 821 } 822 823 instance := &azureInstance{ 824 environ: env, 825 hostedService: &hostedService.HostedServiceDescriptor, 826 instanceId: instanceId, 827 deploymentName: deployment.Name, 828 roleName: roleName, 829 roleInstance: roleInstance, 830 maskStateServerPorts: maskStateServerPorts, 831 } 832 return instance, nil 833 } 834 835 // newOSDisk creates a gwacl.OSVirtualHardDisk object suitable for an 836 // Azure Virtual Machine. 837 func (env *azureEnviron) newOSDisk(sourceImageName string) *gwacl.OSVirtualHardDisk { 838 vhdName := gwacl.MakeRandomDiskName("juju") 839 vhdPath := fmt.Sprintf("vhds/%s", vhdName) 840 snap := env.getSnapshot() 841 storageAccount := snap.ecfg.storageAccountName() 842 mediaLink := gwacl.CreateVirtualHardDiskMediaLink(storageAccount, vhdPath) 843 // The disk label is optional and the disk name can be omitted if 844 // mediaLink is provided. 845 return gwacl.NewOSVirtualHardDisk("", "", "", mediaLink, sourceImageName, "Linux") 846 } 847 848 // getInitialEndpoints returns a slice of the endpoints every instance should have open 849 // (ssh port, etc). 850 func (env *azureEnviron) getInitialEndpoints(stateServer bool) []gwacl.InputEndpoint { 851 cfg := env.Config() 852 endpoints := []gwacl.InputEndpoint{{ 853 LocalPort: 22, 854 Name: "sshport", 855 Port: 22, 856 Protocol: "tcp", 857 }} 858 if stateServer { 859 endpoints = append(endpoints, []gwacl.InputEndpoint{{ 860 LocalPort: cfg.APIPort(), 861 Port: cfg.APIPort(), 862 Protocol: "tcp", 863 Name: "apiport", 864 }}...) 865 } 866 for i, endpoint := range endpoints { 867 endpoint.LoadBalancedEndpointSetName = endpoint.Name 868 endpoint.LoadBalancerProbe = &gwacl.LoadBalancerProbe{ 869 Port: endpoint.Port, 870 Protocol: "TCP", 871 } 872 endpoints[i] = endpoint 873 } 874 return endpoints 875 } 876 877 // newRole creates a gwacl.Role object (an Azure Virtual Machine) which uses 878 // the given Virtual Hard Drive. 879 // 880 // The VM will have: 881 // - an 'ubuntu' user defined with an unguessable (randomly generated) password 882 // - its ssh port (TCP 22) open 883 // (if a state server) 884 // - its state port (TCP mongoDB) port open 885 // - its API port (TCP) open 886 // 887 // roleSize is the name of one of Azure's machine types, e.g. ExtraSmall, 888 // Large, A6 etc. 889 func (env *azureEnviron) newRole(roleSize string, vhd *gwacl.OSVirtualHardDisk, userData string, stateServer bool) *gwacl.Role { 890 roleName := gwacl.MakeRandomRoleName("juju") 891 // Create a Linux Configuration with the username and the password 892 // empty and disable SSH with password authentication. 893 hostname := roleName 894 username := "ubuntu" 895 password := gwacl.MakeRandomPassword() 896 linuxConfigurationSet := gwacl.NewLinuxProvisioningConfigurationSet(hostname, username, password, userData, "true") 897 // Generate a Network Configuration with the initially required ports open. 898 networkConfigurationSet := gwacl.NewNetworkConfigurationSet(env.getInitialEndpoints(stateServer), nil) 899 role := gwacl.NewLinuxRole( 900 roleSize, roleName, vhd, 901 []gwacl.ConfigurationSet{*linuxConfigurationSet, *networkConfigurationSet}, 902 ) 903 role.AvailabilitySetName = "juju" 904 return role 905 } 906 907 // StopInstances is specified in the InstanceBroker interface. 908 func (env *azureEnviron) StopInstances(ids ...instance.Id) error { 909 snap := env.getSnapshot() 910 911 // Map services to role names we want to delete. 912 serviceInstances := make(map[string]map[string]bool) 913 var serviceNames []string 914 for _, id := range ids { 915 serviceName, roleName := env.splitInstanceId(id) 916 if roleName == "" { 917 serviceInstances[serviceName] = nil 918 serviceNames = append(serviceNames, serviceName) 919 } else { 920 deleteRoleNames, ok := serviceInstances[serviceName] 921 if !ok { 922 deleteRoleNames = make(map[string]bool) 923 serviceInstances[serviceName] = deleteRoleNames 924 serviceNames = append(serviceNames, serviceName) 925 } 926 deleteRoleNames[roleName] = true 927 } 928 } 929 930 // Load the properties of each service, so we know whether to 931 // delete the entire service. 932 // 933 // Note: concurrent operations on Affinity Groups have been 934 // found to cause conflict responses, so we do everything serially. 935 for _, serviceName := range serviceNames { 936 deleteRoleNames := serviceInstances[serviceName] 937 service, err := snap.api.GetHostedServiceProperties(serviceName, true) 938 if err != nil { 939 return err 940 } else if len(service.Deployments) != 1 { 941 continue 942 } 943 // Filter the instances that have no corresponding role. 944 roleNames := make(set.Strings) 945 for _, role := range service.Deployments[0].RoleList { 946 roleNames.Add(role.RoleName) 947 } 948 for roleName := range deleteRoleNames { 949 if !roleNames.Contains(roleName) { 950 delete(deleteRoleNames, roleName) 951 } 952 } 953 // If we're deleting all the roles, we need to delete the 954 // entire cloud service or we'll get an error. deleteRoleNames 955 // is nil if we're dealing with a legacy deployment. 956 if deleteRoleNames == nil || len(deleteRoleNames) == roleNames.Size() { 957 if err := snap.api.DeleteHostedService(serviceName); err != nil { 958 return err 959 } 960 } else { 961 for roleName := range deleteRoleNames { 962 if err := snap.api.DeleteRole(&gwacl.DeleteRoleRequest{ 963 ServiceName: serviceName, 964 DeploymentName: service.Deployments[0].Name, 965 RoleName: roleName, 966 DeleteMedia: true, 967 }); err != nil { 968 return err 969 } 970 } 971 } 972 } 973 return nil 974 } 975 976 // hostedServices returns all services for this environment. 977 func (env *azureEnviron) hostedServices() ([]gwacl.HostedServiceDescriptor, error) { 978 snap := env.getSnapshot() 979 services, err := snap.api.ListHostedServices() 980 if err != nil { 981 return nil, err 982 } 983 984 var filteredServices []gwacl.HostedServiceDescriptor 985 // Service names are prefixed with the environment name, followed by "-". 986 // We must be careful not to include services where the environment name 987 // is a substring of another name. ie we mustn't allow "azure" to match "azure-1". 988 envPrefix := env.getEnvPrefix() 989 // Just in case. 990 filterPrefix := regexp.QuoteMeta(envPrefix) 991 992 // Now filter the services. 993 prefixMatch := regexp.MustCompile("^" + filterPrefix + "[^-]*$") 994 for _, service := range services { 995 if prefixMatch.Match([]byte(service.ServiceName)) { 996 filteredServices = append(filteredServices, service) 997 } 998 } 999 return filteredServices, nil 1000 } 1001 1002 // destroyAllServices destroys all Cloud Services and deployments contained. 1003 // This is needed to clean up broken environments, in which there are cloud 1004 // services with no deployments. 1005 func (env *azureEnviron) destroyAllServices() error { 1006 services, err := env.hostedServices() 1007 if err != nil { 1008 return err 1009 } 1010 snap := env.getSnapshot() 1011 for _, service := range services { 1012 if err := snap.api.DeleteHostedService(service.ServiceName); err != nil { 1013 return err 1014 } 1015 } 1016 return nil 1017 } 1018 1019 // splitInstanceId splits the specified instance.Id into its 1020 // cloud-service and role parts. Both values will be empty 1021 // if the instance-id is non-matching, and role will be empty 1022 // for legacy instance-ids. 1023 func (env *azureEnviron) splitInstanceId(id instance.Id) (service, role string) { 1024 prefix := env.getEnvPrefix() 1025 if !strings.HasPrefix(string(id), prefix) { 1026 return "", "" 1027 } 1028 fields := strings.Split(string(id)[len(prefix):], "-") 1029 service = prefix + fields[0] 1030 if len(fields) > 1 { 1031 role = fields[1] 1032 } 1033 return service, role 1034 } 1035 1036 // Instances is specified in the Environ interface. 1037 func (env *azureEnviron) Instances(ids []instance.Id) ([]instance.Instance, error) { 1038 snap := env.getSnapshot() 1039 1040 type instanceId struct { 1041 serviceName, roleName string 1042 } 1043 1044 instancesIds := make([]instanceId, len(ids)) 1045 serviceNames := make(set.Strings) 1046 for i, id := range ids { 1047 serviceName, roleName := env.splitInstanceId(id) 1048 if serviceName == "" { 1049 continue 1050 } 1051 instancesIds[i] = instanceId{ 1052 serviceName: serviceName, 1053 roleName: roleName, 1054 } 1055 serviceNames.Add(serviceName) 1056 } 1057 1058 // Map service names to gwacl.HostedServices. 1059 services, err := snap.api.ListSpecificHostedServices(&gwacl.ListSpecificHostedServicesRequest{ 1060 ServiceNames: serviceNames.Values(), 1061 }) 1062 if err != nil { 1063 return nil, err 1064 } 1065 if len(services) == 0 { 1066 return nil, environs.ErrNoInstances 1067 } 1068 hostedServices := make(map[string]*gwacl.HostedService) 1069 for _, s := range services { 1070 hostedService, err := snap.api.GetHostedServiceProperties(s.ServiceName, true) 1071 if err != nil { 1072 return nil, err 1073 } 1074 hostedServices[s.ServiceName] = hostedService 1075 } 1076 1077 var validInstances int 1078 instances := make([]instance.Instance, len(ids)) 1079 for i, id := range instancesIds { 1080 if id.serviceName == "" { 1081 // Previously determined to be an invalid instance ID. 1082 continue 1083 } 1084 hostedService := hostedServices[id.serviceName] 1085 instance, err := snap.getInstance(hostedService, id.roleName) 1086 if err == nil { 1087 instances[i] = instance 1088 validInstances++ 1089 } else { 1090 logger.Debugf("failed to get instance for role %q in service %q: %v", id.roleName, hostedService.ServiceName, err) 1091 } 1092 } 1093 1094 switch validInstances { 1095 case len(instances): 1096 err = nil 1097 case 0: 1098 instances = nil 1099 err = environs.ErrNoInstances 1100 default: 1101 err = environs.ErrPartialInstances 1102 } 1103 return instances, err 1104 } 1105 1106 // AllInstances is specified in the InstanceBroker interface. 1107 func (env *azureEnviron) AllInstances() ([]instance.Instance, error) { 1108 // The instance list is built using the list of all the Azure 1109 // Services (instance==service). 1110 // Acquire management API object. 1111 snap := env.getSnapshot() 1112 1113 serviceDescriptors, err := env.hostedServices() 1114 if err != nil { 1115 return nil, err 1116 } 1117 1118 var instances []instance.Instance 1119 for _, sd := range serviceDescriptors { 1120 hostedService, err := snap.api.GetHostedServiceProperties(sd.ServiceName, true) 1121 if err != nil { 1122 return nil, err 1123 } else if len(hostedService.Deployments) != 1 { 1124 continue 1125 } 1126 deployment := &hostedService.Deployments[0] 1127 for _, role := range deployment.RoleList { 1128 instance, err := snap.getInstance(hostedService, role.RoleName) 1129 if err != nil { 1130 return nil, err 1131 } 1132 instances = append(instances, instance) 1133 } 1134 } 1135 return instances, nil 1136 } 1137 1138 // getEnvPrefix returns the prefix used to name the objects specific to this 1139 // environment. The environment prefix name is immutable, so there is no need 1140 // to use a configuration snapshot. 1141 func (env *azureEnviron) getEnvPrefix() string { 1142 return fmt.Sprintf("juju-%s-", env.Config().Name()) 1143 } 1144 1145 // Storage is specified in the Environ interface. 1146 func (env *azureEnviron) Storage() storage.Storage { 1147 return env.getSnapshot().storage 1148 } 1149 1150 // Destroy is specified in the Environ interface. 1151 func (env *azureEnviron) Destroy() error { 1152 logger.Debugf("destroying environment %q", env.Config().Name()) 1153 1154 // Stop all instances. 1155 if err := env.destroyAllServices(); err != nil { 1156 return fmt.Errorf("cannot destroy instances: %v", err) 1157 } 1158 1159 // Delete vnet and affinity group. Deleting the virtual network 1160 // may fail for inexplicable reasons (cannot delete in the Azure 1161 // console either for some amount of time after deleting dependent 1162 // VMs), so we only treat this as a warning. There is no cost 1163 // associated with a vnet or affinity group. 1164 if err := env.deleteVirtualNetwork(); err != nil { 1165 logger.Warningf("cannot delete the environment's virtual network: %v", err) 1166 } 1167 if err := env.deleteAffinityGroup(); err != nil { 1168 logger.Warningf("cannot delete the environment's affinity group: %v", err) 1169 } 1170 1171 // Delete storage. 1172 // Deleting the storage is done last so that if something fails 1173 // half way through the Destroy() method, the storage won't be cleaned 1174 // up and thus an attempt to re-boostrap the environment will lead to 1175 // a "error: environment is already bootstrapped" error. 1176 if err := env.Storage().RemoveAll(); err != nil { 1177 return fmt.Errorf("cannot clean up storage: %v", err) 1178 } 1179 return nil 1180 } 1181 1182 // OpenPorts is specified in the Environ interface. However, Azure does not 1183 // support the global firewall mode. 1184 func (env *azureEnviron) OpenPorts(ports []network.PortRange) error { 1185 return nil 1186 } 1187 1188 // ClosePorts is specified in the Environ interface. However, Azure does not 1189 // support the global firewall mode. 1190 func (env *azureEnviron) ClosePorts(ports []network.PortRange) error { 1191 return nil 1192 } 1193 1194 // Ports is specified in the Environ interface. 1195 func (env *azureEnviron) Ports() ([]network.PortRange, error) { 1196 // TODO: implement this. 1197 return []network.PortRange{}, nil 1198 } 1199 1200 // Provider is specified in the Environ interface. 1201 func (env *azureEnviron) Provider() environs.EnvironProvider { 1202 return azureEnvironProvider{} 1203 } 1204 1205 var ( 1206 retryPolicy = gwacl.RetryPolicy{ 1207 NbRetries: 6, 1208 HttpStatusCodes: []int{ 1209 http.StatusConflict, 1210 http.StatusRequestTimeout, 1211 http.StatusInternalServerError, 1212 http.StatusServiceUnavailable, 1213 }, 1214 Delay: 10 * time.Second} 1215 ) 1216 1217 // updateStorageAccountKey queries the storage account key, and updates the 1218 // version cached in env.storageAccountKey. 1219 // 1220 // It takes a snapshot in order to preserve transactional integrity relative 1221 // to the snapshot's starting state, without having to lock the environment 1222 // for the duration. If there is a conflicting change to env relative to the 1223 // state recorded in the snapshot, this function will fail. 1224 func (env *azureEnviron) updateStorageAccountKey(snapshot *azureEnviron) (string, error) { 1225 // This method follows an RCU pattern, an optimistic technique to 1226 // implement atomic read-update transactions: get a consistent snapshot 1227 // of state; process data; enter critical section; check for conflicts; 1228 // write back changes. The advantage is that there are no long-held 1229 // locks, in particular while waiting for the request to Azure to 1230 // complete. 1231 // "Get a consistent snapshot of state" is the caller's responsibility. 1232 // The caller can use env.getSnapshot(). 1233 1234 // Process data: get a current account key from Azure. 1235 key, err := env.queryStorageAccountKey() 1236 if err != nil { 1237 return "", err 1238 } 1239 1240 // Enter critical section. 1241 env.Lock() 1242 defer env.Unlock() 1243 1244 // Check for conflicts: is the config still what it was? 1245 if env.ecfg != snapshot.ecfg { 1246 // The environment has been reconfigured while we were 1247 // working on this, so the key we just get may not be 1248 // appropriate any longer. So fail. 1249 // Whatever we were doing isn't likely to be right any more 1250 // anyway. Otherwise, it might be worth returning the key 1251 // just in case it still works, and proceed without updating 1252 // env.storageAccountKey. 1253 return "", fmt.Errorf("environment was reconfigured") 1254 } 1255 1256 // Write back changes. 1257 env.storageAccountKey = key 1258 return key, nil 1259 } 1260 1261 // getStorageContext obtains a context object for interfacing with Azure's 1262 // storage API. 1263 // For now, each invocation just returns a separate object. This is probably 1264 // wasteful (each context gets its own SSL connection) and may need optimizing 1265 // later. 1266 func (env *azureEnviron) getStorageContext() (*gwacl.StorageContext, error) { 1267 snap := env.getSnapshot() 1268 key := snap.storageAccountKey 1269 if key == "" { 1270 // We don't know the storage-account key yet. Request it. 1271 var err error 1272 key, err = env.updateStorageAccountKey(snap) 1273 if err != nil { 1274 return nil, err 1275 } 1276 } 1277 context := gwacl.StorageContext{ 1278 Account: snap.ecfg.storageAccountName(), 1279 Key: key, 1280 AzureEndpoint: gwacl.GetEndpoint(snap.ecfg.location()), 1281 RetryPolicy: retryPolicy, 1282 } 1283 return &context, nil 1284 } 1285 1286 // TODO(ericsnow) lp-1398055 1287 // Implement the ZonedEnviron interface. 1288 1289 // Region is specified in the HasRegion interface. 1290 func (env *azureEnviron) Region() (simplestreams.CloudSpec, error) { 1291 ecfg := env.getSnapshot().ecfg 1292 return simplestreams.CloudSpec{ 1293 Region: ecfg.location(), 1294 Endpoint: string(gwacl.GetEndpoint(ecfg.location())), 1295 }, nil 1296 } 1297 1298 // SupportsUnitPlacement is specified in the state.EnvironCapability interface. 1299 func (env *azureEnviron) SupportsUnitPlacement() error { 1300 if env.getSnapshot().ecfg.availabilitySetsEnabled() { 1301 return fmt.Errorf("unit placement is not supported with availability-sets-enabled") 1302 } 1303 return nil 1304 }