github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/provider/azure/environ.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package azure 5 6 import ( 7 "encoding/base64" 8 "fmt" 9 "net/http" 10 "regexp" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/utils" 17 "github.com/juju/utils/set" 18 "launchpad.net/gwacl" 19 20 "github.com/juju/juju/constraints" 21 "github.com/juju/juju/environs" 22 "github.com/juju/juju/environs/config" 23 "github.com/juju/juju/environs/imagemetadata" 24 "github.com/juju/juju/environs/instances" 25 "github.com/juju/juju/environs/simplestreams" 26 "github.com/juju/juju/environs/storage" 27 "github.com/juju/juju/instance" 28 "github.com/juju/juju/network" 29 "github.com/juju/juju/provider/common" 30 "github.com/juju/juju/state" 31 "github.com/juju/juju/state/multiwatcher" 32 ) 33 34 const ( 35 // deploymentSlot says in which slot to deploy instances. Azure 36 // supports 'Production' or 'Staging'. 37 // This provider always deploys to Production. Think twice about 38 // changing that: DNS names in the staging slot work differently from 39 // those in the production slot. In Staging, Azure assigns an 40 // arbitrary hostname that we can then extract from the deployment's 41 // URL. In Production, the hostname in the deployment URL does not 42 // actually seem to resolve; instead, the service name is used as the 43 // DNS name, with ".cloudapp.net" appended. 44 deploymentSlot = "Production" 45 46 // Address space of the virtual network used by the nodes in this 47 // environement, in CIDR notation. This is the network used for 48 // machine-to-machine communication. 49 networkDefinition = "10.0.0.0/8" 50 51 // stateServerLabel is the label applied to the cloud service created 52 // for state servers. 53 stateServerLabel = "juju-state-server" 54 ) 55 56 // vars for testing purposes. 57 var ( 58 createInstance = (*azureEnviron).createInstance 59 ) 60 61 type azureEnviron struct { 62 // Except where indicated otherwise, all fields in this object should 63 // only be accessed using a lock or a snapshot. 64 sync.Mutex 65 66 // archMutex gates access to supportedArchitectures 67 archMutex sync.Mutex 68 // supportedArchitectures caches the architectures 69 // for which images can be instantiated. 70 supportedArchitectures []string 71 72 // ecfg is the environment's Azure-specific configuration. 73 ecfg *azureEnvironConfig 74 75 // storage is this environ's own private storage. 76 storage storage.Storage 77 78 // storageAccountKey holds an access key to this environment's 79 // private storage. This is automatically queried from Azure on 80 // startup. 81 storageAccountKey string 82 83 // api is a management API for Microsoft Azure. 84 api *gwacl.ManagementAPI 85 86 // vnet describes the configured virtual network. 87 vnet *gwacl.VirtualNetworkSite 88 89 // availableRoleSizes is the role sizes available in the configured 90 // location. This will be reset whenever the location configuration changes. 91 availableRoleSizes set.Strings 92 } 93 94 // azureEnviron implements Environ and HasRegion. 95 var _ environs.Environ = (*azureEnviron)(nil) 96 var _ simplestreams.HasRegion = (*azureEnviron)(nil) 97 var _ state.Prechecker = (*azureEnviron)(nil) 98 99 // NewEnviron creates a new azureEnviron. 100 func NewEnviron(cfg *config.Config) (*azureEnviron, error) { 101 var env azureEnviron 102 err := env.SetConfig(cfg) 103 if err != nil { 104 return nil, err 105 } 106 107 // Set up storage. 108 env.storage = &azureStorage{ 109 storageContext: &environStorageContext{environ: &env}, 110 } 111 return &env, nil 112 } 113 114 // extractStorageKey returns the primary account key from a gwacl 115 // StorageAccountKeys struct, or if there is none, the secondary one. 116 func extractStorageKey(keys *gwacl.StorageAccountKeys) string { 117 if keys.Primary != "" { 118 return keys.Primary 119 } 120 return keys.Secondary 121 } 122 123 // queryStorageAccountKey retrieves the storage account's key from Azure. 124 func (env *azureEnviron) queryStorageAccountKey() (string, error) { 125 snap := env.getSnapshot() 126 127 accountName := snap.ecfg.storageAccountName() 128 keys, err := snap.api.GetStorageAccountKeys(accountName) 129 if err != nil { 130 return "", errors.Annotate(err, "cannot obtain storage account keys") 131 } 132 133 key := extractStorageKey(keys) 134 if key == "" { 135 return "", errors.New("no keys available for storage account") 136 } 137 138 return key, nil 139 } 140 141 // getSnapshot produces an atomic shallow copy of the environment object. 142 // Whenever you need to access the environment object's fields without 143 // modifying them, get a snapshot and read its fields instead. You will 144 // get a consistent view of the fields without any further locking. 145 // If you do need to modify the environment's fields, do not get a snapshot 146 // but lock the object throughout the critical section. 147 func (env *azureEnviron) getSnapshot() *azureEnviron { 148 env.Lock() 149 defer env.Unlock() 150 151 // Copy the environment. (Not the pointer, the environment itself.) 152 // This is a shallow copy. 153 snap := *env 154 // Reset the snapshot's mutex, because we just copied it while we 155 // were holding it. The snapshot will have a "clean," unlocked mutex. 156 snap.Mutex = sync.Mutex{} 157 return &snap 158 } 159 160 // getAffinityGroupName returns the name of the affinity group used by all 161 // the Services in this environment. The affinity group name is immutable, 162 // so there is no need to use a configuration snapshot. 163 func (env *azureEnviron) getAffinityGroupName() string { 164 return env.getEnvPrefix() + "ag" 165 } 166 167 // getLocation gets the configured Location for the environment. 168 func (env *azureEnviron) getLocation() string { 169 snap := env.getSnapshot() 170 return snap.ecfg.location() 171 } 172 173 func (env *azureEnviron) createAffinityGroup() error { 174 snap := env.getSnapshot() 175 affinityGroupName := env.getAffinityGroupName() 176 location := snap.ecfg.location() 177 cag := gwacl.NewCreateAffinityGroup(affinityGroupName, affinityGroupName, affinityGroupName, location) 178 return snap.api.CreateAffinityGroup(&gwacl.CreateAffinityGroupRequest{ 179 CreateAffinityGroup: cag, 180 }) 181 } 182 183 func (env *azureEnviron) deleteAffinityGroup() error { 184 snap := env.getSnapshot() 185 affinityGroupName := env.getAffinityGroupName() 186 return snap.api.DeleteAffinityGroup(&gwacl.DeleteAffinityGroupRequest{ 187 Name: affinityGroupName, 188 }) 189 } 190 191 // getAvailableRoleSizes returns the role sizes available for the configured 192 // location. 193 func (env *azureEnviron) getAvailableRoleSizes() (_ set.Strings, err error) { 194 defer errors.DeferredAnnotatef(&err, "cannot get available role sizes") 195 196 snap := env.getSnapshot() 197 if snap.availableRoleSizes != nil { 198 return snap.availableRoleSizes, nil 199 } 200 locations, err := snap.api.ListLocations() 201 if err != nil { 202 return nil, errors.Annotate(err, "cannot list locations") 203 } 204 var available set.Strings 205 for _, location := range locations { 206 if location.Name != snap.ecfg.location() { 207 continue 208 } 209 if location.ComputeCapabilities == nil { 210 return nil, errors.Annotate(err, "cannot determine compute capabilities") 211 } 212 available = set.NewStrings(location.ComputeCapabilities.VirtualMachineRoleSizes...) 213 break 214 } 215 if available == nil { 216 return nil, errors.NotFoundf("location %q", snap.ecfg.location()) 217 } 218 env.Lock() 219 env.availableRoleSizes = available 220 env.Unlock() 221 return available, nil 222 } 223 224 // getVirtualNetworkName returns the name of the virtual network used by all 225 // the VMs in this environment. The virtual network name is immutable, 226 // so there is no need to use a configuration snapshot. 227 func (env *azureEnviron) getVirtualNetworkName() string { 228 return env.getEnvPrefix() + "vnet" 229 } 230 231 // getVirtualNetwork returns the virtual network used by all the VMs in this 232 // environment. 233 func (env *azureEnviron) getVirtualNetwork() (*gwacl.VirtualNetworkSite, error) { 234 snap := env.getSnapshot() 235 if snap.vnet != nil { 236 return snap.vnet, nil 237 } 238 cfg, err := env.api.GetNetworkConfiguration() 239 if err != nil { 240 return nil, errors.Annotate(err, "error getting network configuration") 241 } 242 var vnet *gwacl.VirtualNetworkSite 243 vnetName := env.getVirtualNetworkName() 244 if cfg != nil && cfg.VirtualNetworkSites != nil { 245 for _, site := range *cfg.VirtualNetworkSites { 246 if site.Name == vnetName { 247 vnet = &site 248 break 249 } 250 } 251 } 252 if vnet == nil { 253 return nil, errors.NotFoundf("virtual network %q", vnetName) 254 } 255 env.Lock() 256 env.vnet = vnet 257 env.Unlock() 258 return vnet, nil 259 } 260 261 // createVirtualNetwork creates a virtual network for the environment. 262 func (env *azureEnviron) createVirtualNetwork() error { 263 snap := env.getSnapshot() 264 vnetName := env.getVirtualNetworkName() 265 virtualNetwork := gwacl.VirtualNetworkSite{ 266 Name: vnetName, 267 Location: snap.ecfg.location(), 268 AddressSpacePrefixes: []string{ 269 networkDefinition, 270 }, 271 } 272 if err := snap.api.AddVirtualNetworkSite(&virtualNetwork); err != nil { 273 return errors.Trace(err) 274 } 275 env.Lock() 276 env.vnet = &virtualNetwork 277 env.Unlock() 278 return nil 279 } 280 281 // deleteVnetAttempt is an AttemptyStrategy for use 282 // when attempting delete a virtual network. This is 283 // necessary as Azure apparently does not release all 284 // references to the vnet even when all cloud services 285 // are deleted. 286 var deleteVnetAttempt = utils.AttemptStrategy{ 287 Total: 30 * time.Second, 288 Delay: 1 * time.Second, 289 } 290 291 var networkInUse = regexp.MustCompile(".*The virtual network .* is currently in use.*") 292 293 func (env *azureEnviron) deleteVirtualNetwork() error { 294 snap := env.getSnapshot() 295 vnetName := env.getVirtualNetworkName() 296 var err error 297 for a := deleteVnetAttempt.Start(); a.Next(); { 298 err = snap.api.RemoveVirtualNetworkSite(vnetName) 299 if err == nil { 300 return nil 301 } 302 if err, ok := err.(*gwacl.AzureError); ok { 303 if err.StatusCode() == 400 && networkInUse.MatchString(err.Message) { 304 // Retry on "virtual network XYZ is currently in use". 305 continue 306 } 307 } 308 // Any other error should be returned. 309 break 310 } 311 return err 312 } 313 314 // getContainerName returns the name of the private storage account container 315 // that this environment is using. The container name is immutable, 316 // so there is no need to use a configuration snapshot. 317 func (env *azureEnviron) getContainerName() string { 318 return env.getEnvPrefix() + "private" 319 } 320 321 func isHTTPConflict(err error) bool { 322 if err, ok := err.(gwacl.HTTPError); ok { 323 return err.StatusCode() == http.StatusConflict 324 } 325 return false 326 } 327 328 func isVirtualNetworkExist(err error) bool { 329 // TODO(axw) 2014-06-16 #1330473 330 // Add an error type to gwacl for this. 331 s := err.Error() 332 const prefix = "could not add virtual network" 333 const suffix = "already exists" 334 return strings.HasPrefix(s, prefix) && strings.HasSuffix(s, suffix) 335 } 336 337 // Bootstrap is specified in the Environ interface. 338 func (env *azureEnviron) Bootstrap(ctx environs.BootstrapContext, args environs.BootstrapParams) (arch, series string, _ environs.BootstrapFinalizer, err error) { 339 // The creation of the affinity group and the virtual network is specific to the Azure provider. 340 err = env.createAffinityGroup() 341 if err != nil && !isHTTPConflict(err) { 342 return "", "", nil, err 343 } 344 // If we fail after this point, clean up the affinity group. 345 defer func() { 346 if err != nil { 347 env.deleteAffinityGroup() 348 } 349 }() 350 351 err = env.createVirtualNetwork() 352 if err != nil && !isVirtualNetworkExist(err) { 353 return "", "", nil, err 354 } 355 // If we fail after this point, clean up the virtual network. 356 defer func() { 357 if err != nil { 358 env.deleteVirtualNetwork() 359 } 360 }() 361 return common.Bootstrap(ctx, env, args) 362 } 363 364 // isLegacyInstance reports whether the instance is a 365 // legacy instance (i.e. one-to-one cloud service to instance). 366 func isLegacyInstance(inst *azureInstance) (bool, error) { 367 snap := inst.environ.getSnapshot() 368 serviceName := inst.hostedService.ServiceName 369 service, err := snap.api.GetHostedServiceProperties(serviceName, true) 370 if err != nil { 371 return false, err 372 } else if len(service.Deployments) != 1 { 373 return false, nil 374 } 375 deploymentName := service.Deployments[0].Name 376 return deploymentName == deploymentNameV1(serviceName), nil 377 } 378 379 // StateServerInstances is specified in the Environ interface. 380 func (env *azureEnviron) StateServerInstances() ([]instance.Id, error) { 381 // Locate the state-server cloud service, and get its addresses. 382 instances, err := env.AllInstances() 383 if err != nil { 384 return nil, err 385 } 386 var stateServerInstanceIds []instance.Id 387 var loadStateFile bool 388 for _, inst := range instances { 389 azureInstance := inst.(*azureInstance) 390 label := azureInstance.hostedService.Label 391 if decoded, err := base64.StdEncoding.DecodeString(label); err == nil { 392 if string(decoded) == stateServerLabel { 393 stateServerInstanceIds = append(stateServerInstanceIds, inst.Id()) 394 continue 395 } 396 } 397 if !loadStateFile { 398 _, roleName := env.splitInstanceId(azureInstance.Id()) 399 if roleName == "" { 400 loadStateFile = true 401 } 402 } 403 } 404 if loadStateFile { 405 // Some legacy instances were found, so we must load provider-state 406 // to find which instance was the original state server. If we find 407 // a legacy environment, then stateServerInstanceIds will not contain 408 // the original bootstrap instance, which is the only one that will 409 // be in provider-state. 410 instanceIds, err := common.ProviderStateInstances(env, env.Storage()) 411 if err != nil { 412 return nil, err 413 } 414 stateServerInstanceIds = append(stateServerInstanceIds, instanceIds...) 415 } 416 if len(stateServerInstanceIds) == 0 { 417 return nil, environs.ErrNoInstances 418 } 419 return stateServerInstanceIds, nil 420 } 421 422 // Config is specified in the Environ interface. 423 func (env *azureEnviron) Config() *config.Config { 424 snap := env.getSnapshot() 425 return snap.ecfg.Config 426 } 427 428 // SetConfig is specified in the Environ interface. 429 func (env *azureEnviron) SetConfig(cfg *config.Config) error { 430 var oldLocation string 431 if snap := env.getSnapshot(); snap.ecfg != nil { 432 oldLocation = snap.ecfg.location() 433 } 434 435 ecfg, err := azureEnvironProvider{}.newConfig(cfg) 436 if err != nil { 437 return err 438 } 439 440 env.Lock() 441 defer env.Unlock() 442 443 if env.ecfg != nil { 444 _, err = azureEnvironProvider{}.Validate(cfg, env.ecfg.Config) 445 if err != nil { 446 return err 447 } 448 } 449 450 env.ecfg = ecfg 451 452 // Reset storage account key. Even if we had one before, it may not 453 // be appropriate for the new config. 454 env.storageAccountKey = "" 455 456 subscription := ecfg.managementSubscriptionId() 457 certKeyPEM := []byte(ecfg.managementCertificate()) 458 location := ecfg.location() 459 mgtAPI, err := gwacl.NewManagementAPICertDataWithRetryPolicy(subscription, certKeyPEM, certKeyPEM, location, retryPolicy) 460 if err != nil { 461 return errors.Annotate(err, "cannot acquire management API") 462 } 463 env.api = mgtAPI 464 465 // If the location changed, reset the available role sizes. 466 if location != oldLocation { 467 env.availableRoleSizes = nil 468 } 469 470 return nil 471 } 472 473 // attemptCreateService tries to create a new hosted service on Azure, with a 474 // name it chooses (based on the given prefix), but recognizes that the name 475 // may not be available. If the name is not available, it does not treat that 476 // as an error but just returns nil. 477 func attemptCreateService(azure *gwacl.ManagementAPI, prefix, affinityGroupName, label string) (*gwacl.CreateHostedService, error) { 478 var err error 479 name := gwacl.MakeRandomHostedServiceName(prefix) 480 err = azure.CheckHostedServiceNameAvailability(name) 481 if err != nil { 482 // The calling function should retry. 483 return nil, nil 484 } 485 if label == "" { 486 label = name 487 } 488 req := gwacl.NewCreateHostedServiceWithLocation(name, label, "") 489 req.AffinityGroup = affinityGroupName 490 err = azure.AddHostedService(req) 491 if err != nil { 492 return nil, err 493 } 494 return req, nil 495 } 496 497 // newHostedService creates a hosted service. It will make up a unique name, 498 // starting with the given prefix. 499 func newHostedService(azure *gwacl.ManagementAPI, prefix, affinityGroupName, label string) (*gwacl.HostedService, error) { 500 var err error 501 var createdService *gwacl.CreateHostedService 502 for tries := 10; tries > 0 && err == nil && createdService == nil; tries-- { 503 createdService, err = attemptCreateService(azure, prefix, affinityGroupName, label) 504 } 505 if err != nil { 506 return nil, errors.Annotate(err, "could not create hosted service") 507 } 508 if createdService == nil { 509 return nil, fmt.Errorf("could not come up with a unique hosted service name - is your randomizer initialized?") 510 } 511 return azure.GetHostedServiceProperties(createdService.ServiceName, true) 512 } 513 514 // SupportedArchitectures is specified on the EnvironCapability interface. 515 func (env *azureEnviron) SupportedArchitectures() ([]string, error) { 516 env.archMutex.Lock() 517 defer env.archMutex.Unlock() 518 if env.supportedArchitectures != nil { 519 return env.supportedArchitectures, nil 520 } 521 // Create a filter to get all images from our region and for the correct stream. 522 ecfg := env.getSnapshot().ecfg 523 region := ecfg.location() 524 cloudSpec := simplestreams.CloudSpec{ 525 Region: region, 526 Endpoint: getEndpoint(region), 527 } 528 imageConstraint := imagemetadata.NewImageConstraint(simplestreams.LookupParams{ 529 CloudSpec: cloudSpec, 530 Stream: ecfg.ImageStream(), 531 }) 532 var err error 533 env.supportedArchitectures, err = common.SupportedArchitectures(env, imageConstraint) 534 return env.supportedArchitectures, err 535 } 536 537 // selectInstanceTypeAndImage returns the appropriate instances.InstanceType and 538 // the OS image name for launching a virtual machine with the given parameters. 539 func (env *azureEnviron) selectInstanceTypeAndImage(constraint *instances.InstanceConstraint) (*instances.InstanceType, string, error) { 540 ecfg := env.getSnapshot().ecfg 541 sourceImageName := ecfg.forceImageName() 542 if sourceImageName != "" { 543 // Configuration forces us to use a specific image. There may 544 // not be a suitable image in the simplestreams database. 545 // This means we can't use Juju's normal selection mechanism, 546 // because it combines instance-type and image selection: if 547 // there are no images we can use, it won't offer us an 548 // instance type either. 549 // 550 // Select the instance type using simple, Azure-specific code. 551 instanceType, err := selectMachineType(env, defaultToBaselineSpec(constraint.Constraints)) 552 if err != nil { 553 return nil, "", err 554 } 555 return instanceType, sourceImageName, nil 556 } 557 558 // Choose the most suitable instance type and OS image, based on simplestreams information. 559 spec, err := findInstanceSpec(env, constraint) 560 if err != nil { 561 return nil, "", err 562 } 563 return &spec.InstanceType, spec.Image.Id, nil 564 } 565 566 var unsupportedConstraints = []string{ 567 constraints.CpuPower, 568 constraints.Tags, 569 } 570 571 // ConstraintsValidator is defined on the Environs interface. 572 func (env *azureEnviron) ConstraintsValidator() (constraints.Validator, error) { 573 validator := constraints.NewValidator() 574 validator.RegisterUnsupported(unsupportedConstraints) 575 supportedArches, err := env.SupportedArchitectures() 576 if err != nil { 577 return nil, err 578 } 579 validator.RegisterVocabulary(constraints.Arch, supportedArches) 580 581 instanceTypes, err := listInstanceTypes(env) 582 if err != nil { 583 return nil, err 584 } 585 instTypeNames := make([]string, len(instanceTypes)) 586 for i, instanceType := range instanceTypes { 587 instTypeNames[i] = instanceType.Name 588 } 589 validator.RegisterVocabulary(constraints.InstanceType, instTypeNames) 590 validator.RegisterConflicts( 591 []string{constraints.InstanceType}, 592 []string{constraints.Mem, constraints.CpuCores, constraints.Arch, constraints.RootDisk}) 593 594 return validator, nil 595 } 596 597 // PrecheckInstance is defined on the state.Prechecker interface. 598 func (env *azureEnviron) PrecheckInstance(series string, cons constraints.Value, placement string) error { 599 if placement != "" { 600 return fmt.Errorf("unknown placement directive: %s", placement) 601 } 602 if !cons.HasInstanceType() { 603 return nil 604 } 605 // Constraint has an instance-type constraint so let's see if it is valid. 606 instanceTypes, err := listInstanceTypes(env) 607 if err != nil { 608 return err 609 } 610 for _, instanceType := range instanceTypes { 611 if instanceType.Name == *cons.InstanceType { 612 return nil 613 } 614 } 615 return fmt.Errorf("invalid instance type %q", *cons.InstanceType) 616 } 617 618 // createInstance creates all of the Azure entities necessary for a 619 // new instance. This includes Cloud Service, Deployment and Role. 620 // 621 // If serviceName is non-empty, then createInstance will assign to 622 // the Cloud Service with that name. Otherwise, a new Cloud Service 623 // will be created. 624 func (env *azureEnviron) createInstance(azure *gwacl.ManagementAPI, role *gwacl.Role, serviceName string, stateServer bool) (resultInst instance.Instance, resultErr error) { 625 var inst instance.Instance 626 defer func() { 627 if inst != nil && resultErr != nil { 628 if err := env.StopInstances(inst.Id()); err != nil { 629 // Failure upon failure. Log it, but return the original error. 630 logger.Errorf("error releasing failed instance: %v", err) 631 } 632 } 633 }() 634 var err error 635 var service *gwacl.HostedService 636 if serviceName != "" { 637 logger.Debugf("creating instance in existing cloud service %q", serviceName) 638 service, err = azure.GetHostedServiceProperties(serviceName, true) 639 } else { 640 logger.Debugf("creating instance in new cloud service") 641 // If we're creating a cloud service for state servers, 642 // we will want to open additional ports. We need to 643 // record this against the cloud service, so we use a 644 // special label for the purpose. 645 var label string 646 if stateServer { 647 label = stateServerLabel 648 } 649 service, err = newHostedService(azure, env.getEnvPrefix(), env.getAffinityGroupName(), label) 650 } 651 if err != nil { 652 return nil, err 653 } 654 if len(service.Deployments) == 0 { 655 // This is a newly created cloud service, so we 656 // should destroy it if anything below fails. 657 defer func() { 658 if resultErr != nil { 659 azure.DeleteHostedService(service.ServiceName) 660 // Destroying the hosted service destroys the instance, 661 // so ensure StopInstances isn't called. 662 inst = nil 663 } 664 }() 665 // Create an initial deployment. 666 deployment := gwacl.NewDeploymentForCreateVMDeployment( 667 deploymentNameV2(service.ServiceName), 668 deploymentSlot, 669 deploymentNameV2(service.ServiceName), 670 []gwacl.Role{*role}, 671 env.getVirtualNetworkName(), 672 ) 673 if err := azure.AddDeployment(deployment, service.ServiceName); err != nil { 674 return nil, errors.Annotate(err, "error creating VM deployment") 675 } 676 service.Deployments = append(service.Deployments, *deployment) 677 } else { 678 // Update the deployment. 679 deployment := &service.Deployments[0] 680 if err := azure.AddRole(&gwacl.AddRoleRequest{ 681 ServiceName: service.ServiceName, 682 DeploymentName: deployment.Name, 683 PersistentVMRole: (*gwacl.PersistentVMRole)(role), 684 }); err != nil { 685 return nil, err 686 } 687 deployment.RoleList = append(deployment.RoleList, *role) 688 } 689 return env.getInstance(service, role.RoleName) 690 } 691 692 // deploymentNameV1 returns the deployment name used 693 // in the original implementation of the Azure provider. 694 func deploymentNameV1(serviceName string) string { 695 return serviceName 696 } 697 698 // deploymentNameV2 returns the deployment name used 699 // in the current implementation of the Azure provider. 700 func deploymentNameV2(serviceName string) string { 701 return serviceName + "-v2" 702 } 703 704 // StartInstance is specified in the InstanceBroker interface. 705 func (env *azureEnviron) StartInstance(args environs.StartInstanceParams) (*environs.StartInstanceResult, error) { 706 if args.MachineConfig.HasNetworks() { 707 return nil, errors.New("starting instances with networks is not supported yet") 708 } 709 710 err := environs.FinishMachineConfig(args.MachineConfig, env.Config()) 711 if err != nil { 712 return nil, err 713 } 714 715 // Pick envtools. Needed for the custom data (which is what we normally 716 // call userdata). 717 args.MachineConfig.Tools = args.Tools[0] 718 logger.Infof("picked tools %q", args.MachineConfig.Tools) 719 720 // Compose userdata. 721 userData, err := makeCustomData(args.MachineConfig) 722 if err != nil { 723 return nil, errors.Annotate(err, "cannot compose user data") 724 } 725 726 snapshot := env.getSnapshot() 727 location := snapshot.ecfg.location() 728 instanceType, sourceImageName, err := env.selectInstanceTypeAndImage(&instances.InstanceConstraint{ 729 Region: location, 730 Series: args.Tools.OneSeries(), 731 Arches: args.Tools.Arches(), 732 Constraints: args.Constraints, 733 }) 734 if err != nil { 735 return nil, err 736 } 737 738 // We use the cloud service label as a way to group instances with 739 // the same affinity, so that machines can be be allocated to the 740 // same availability set. 741 var cloudServiceName string 742 if args.DistributionGroup != nil && snapshot.ecfg.availabilitySetsEnabled() { 743 instanceIds, err := args.DistributionGroup() 744 if err != nil { 745 return nil, err 746 } 747 for _, id := range instanceIds { 748 cloudServiceName, _ = env.splitInstanceId(id) 749 if cloudServiceName != "" { 750 break 751 } 752 } 753 } 754 755 vhd := env.newOSDisk(sourceImageName) 756 // If we're creating machine-0, we'll want to expose port 22. 757 // All other machines get an auto-generated public port for SSH. 758 stateServer := multiwatcher.AnyJobNeedsState(args.MachineConfig.Jobs...) 759 role := env.newRole(instanceType.Id, vhd, userData, stateServer) 760 inst, err := createInstance(env, snapshot.api, role, cloudServiceName, stateServer) 761 if err != nil { 762 return nil, err 763 } 764 hc := &instance.HardwareCharacteristics{ 765 Mem: &instanceType.Mem, 766 RootDisk: &instanceType.RootDisk, 767 CpuCores: &instanceType.CpuCores, 768 } 769 if len(instanceType.Arches) == 1 { 770 hc.Arch = &instanceType.Arches[0] 771 } 772 return &environs.StartInstanceResult{ 773 Instance: inst, 774 Hardware: hc, 775 }, nil 776 } 777 778 // getInstance returns an up-to-date version of the instance with the given 779 // name. 780 func (env *azureEnviron) getInstance(hostedService *gwacl.HostedService, roleName string) (instance.Instance, error) { 781 if n := len(hostedService.Deployments); n != 1 { 782 return nil, fmt.Errorf("expected one deployment for %q, got %d", hostedService.ServiceName, n) 783 } 784 deployment := &hostedService.Deployments[0] 785 786 var maskStateServerPorts bool 787 var instanceId instance.Id 788 switch deployment.Name { 789 case deploymentNameV1(hostedService.ServiceName): 790 // Old style instance. 791 instanceId = instance.Id(hostedService.ServiceName) 792 if n := len(deployment.RoleList); n != 1 { 793 return nil, fmt.Errorf("expected one role for %q, got %d", deployment.Name, n) 794 } 795 roleName = deployment.RoleList[0].RoleName 796 // In the old implementation of the Azure provider, 797 // all machines opened the state and API server ports. 798 maskStateServerPorts = true 799 800 case deploymentNameV2(hostedService.ServiceName): 801 instanceId = instance.Id(fmt.Sprintf("%s-%s", hostedService.ServiceName, roleName)) 802 // Newly created state server machines are put into 803 // the cloud service with the stateServerLabel label. 804 if decoded, err := base64.StdEncoding.DecodeString(hostedService.Label); err == nil { 805 maskStateServerPorts = string(decoded) == stateServerLabel 806 } 807 } 808 809 var roleInstance *gwacl.RoleInstance 810 for _, role := range deployment.RoleInstanceList { 811 if role.RoleName == roleName { 812 roleInstance = &role 813 break 814 } 815 } 816 817 instance := &azureInstance{ 818 environ: env, 819 hostedService: &hostedService.HostedServiceDescriptor, 820 instanceId: instanceId, 821 deploymentName: deployment.Name, 822 roleName: roleName, 823 roleInstance: roleInstance, 824 maskStateServerPorts: maskStateServerPorts, 825 } 826 return instance, nil 827 } 828 829 // newOSDisk creates a gwacl.OSVirtualHardDisk object suitable for an 830 // Azure Virtual Machine. 831 func (env *azureEnviron) newOSDisk(sourceImageName string) *gwacl.OSVirtualHardDisk { 832 vhdName := gwacl.MakeRandomDiskName("juju") 833 vhdPath := fmt.Sprintf("vhds/%s", vhdName) 834 snap := env.getSnapshot() 835 storageAccount := snap.ecfg.storageAccountName() 836 mediaLink := gwacl.CreateVirtualHardDiskMediaLink(storageAccount, vhdPath) 837 // The disk label is optional and the disk name can be omitted if 838 // mediaLink is provided. 839 return gwacl.NewOSVirtualHardDisk("", "", "", mediaLink, sourceImageName, "Linux") 840 } 841 842 // getInitialEndpoints returns a slice of the endpoints every instance should have open 843 // (ssh port, etc). 844 func (env *azureEnviron) getInitialEndpoints(stateServer bool) []gwacl.InputEndpoint { 845 cfg := env.Config() 846 endpoints := []gwacl.InputEndpoint{{ 847 LocalPort: 22, 848 Name: "sshport", 849 Port: 22, 850 Protocol: "tcp", 851 }} 852 if stateServer { 853 endpoints = append(endpoints, []gwacl.InputEndpoint{{ 854 LocalPort: cfg.APIPort(), 855 Port: cfg.APIPort(), 856 Protocol: "tcp", 857 Name: "apiport", 858 }}...) 859 } 860 for i, endpoint := range endpoints { 861 endpoint.LoadBalancedEndpointSetName = endpoint.Name 862 endpoint.LoadBalancerProbe = &gwacl.LoadBalancerProbe{ 863 Port: endpoint.Port, 864 Protocol: "TCP", 865 } 866 endpoints[i] = endpoint 867 } 868 return endpoints 869 } 870 871 // newRole creates a gwacl.Role object (an Azure Virtual Machine) which uses 872 // the given Virtual Hard Drive. 873 // 874 // The VM will have: 875 // - an 'ubuntu' user defined with an unguessable (randomly generated) password 876 // - its ssh port (TCP 22) open 877 // (if a state server) 878 // - its state port (TCP mongoDB) port open 879 // - its API port (TCP) open 880 // 881 // roleSize is the name of one of Azure's machine types, e.g. ExtraSmall, 882 // Large, A6 etc. 883 func (env *azureEnviron) newRole(roleSize string, vhd *gwacl.OSVirtualHardDisk, userData string, stateServer bool) *gwacl.Role { 884 roleName := gwacl.MakeRandomRoleName("juju") 885 // Create a Linux Configuration with the username and the password 886 // empty and disable SSH with password authentication. 887 hostname := roleName 888 username := "ubuntu" 889 password := gwacl.MakeRandomPassword() 890 linuxConfigurationSet := gwacl.NewLinuxProvisioningConfigurationSet(hostname, username, password, userData, "true") 891 // Generate a Network Configuration with the initially required ports open. 892 networkConfigurationSet := gwacl.NewNetworkConfigurationSet(env.getInitialEndpoints(stateServer), nil) 893 role := gwacl.NewRole( 894 roleSize, roleName, vhd, 895 []gwacl.ConfigurationSet{*linuxConfigurationSet, *networkConfigurationSet}, 896 ) 897 role.AvailabilitySetName = "juju" 898 return role 899 } 900 901 // StopInstances is specified in the InstanceBroker interface. 902 func (env *azureEnviron) StopInstances(ids ...instance.Id) error { 903 snap := env.getSnapshot() 904 905 // Map services to role names we want to delete. 906 serviceInstances := make(map[string]map[string]bool) 907 var serviceNames []string 908 for _, id := range ids { 909 serviceName, roleName := env.splitInstanceId(id) 910 if roleName == "" { 911 serviceInstances[serviceName] = nil 912 serviceNames = append(serviceNames, serviceName) 913 } else { 914 deleteRoleNames, ok := serviceInstances[serviceName] 915 if !ok { 916 deleteRoleNames = make(map[string]bool) 917 serviceInstances[serviceName] = deleteRoleNames 918 serviceNames = append(serviceNames, serviceName) 919 } 920 deleteRoleNames[roleName] = true 921 } 922 } 923 924 // Load the properties of each service, so we know whether to 925 // delete the entire service. 926 // 927 // Note: concurrent operations on Affinity Groups have been 928 // found to cause conflict responses, so we do everything serially. 929 for _, serviceName := range serviceNames { 930 deleteRoleNames := serviceInstances[serviceName] 931 service, err := snap.api.GetHostedServiceProperties(serviceName, true) 932 if err != nil { 933 return err 934 } else if len(service.Deployments) != 1 { 935 continue 936 } 937 // Filter the instances that have no corresponding role. 938 roleNames := make(set.Strings) 939 for _, role := range service.Deployments[0].RoleList { 940 roleNames.Add(role.RoleName) 941 } 942 for roleName := range deleteRoleNames { 943 if !roleNames.Contains(roleName) { 944 delete(deleteRoleNames, roleName) 945 } 946 } 947 // If we're deleting all the roles, we need to delete the 948 // entire cloud service or we'll get an error. deleteRoleNames 949 // is nil if we're dealing with a legacy deployment. 950 if deleteRoleNames == nil || len(deleteRoleNames) == roleNames.Size() { 951 if err := snap.api.DeleteHostedService(serviceName); err != nil { 952 return err 953 } 954 } else { 955 for roleName := range deleteRoleNames { 956 if err := snap.api.DeleteRole(&gwacl.DeleteRoleRequest{ 957 ServiceName: serviceName, 958 DeploymentName: service.Deployments[0].Name, 959 RoleName: roleName, 960 DeleteMedia: true, 961 }); err != nil { 962 return err 963 } 964 } 965 } 966 } 967 return nil 968 } 969 970 // hostedServices returns all services for this environment. 971 func (env *azureEnviron) hostedServices() ([]gwacl.HostedServiceDescriptor, error) { 972 snap := env.getSnapshot() 973 services, err := snap.api.ListHostedServices() 974 if err != nil { 975 return nil, err 976 } 977 978 var filteredServices []gwacl.HostedServiceDescriptor 979 // Service names are prefixed with the environment name, followed by "-". 980 // We must be careful not to include services where the environment name 981 // is a substring of another name. ie we mustn't allow "azure" to match "azure-1". 982 envPrefix := env.getEnvPrefix() 983 // Just in case. 984 filterPrefix := regexp.QuoteMeta(envPrefix) 985 986 // Now filter the services. 987 prefixMatch := regexp.MustCompile("^" + filterPrefix + "[^-]*$") 988 for _, service := range services { 989 if prefixMatch.Match([]byte(service.ServiceName)) { 990 filteredServices = append(filteredServices, service) 991 } 992 } 993 return filteredServices, nil 994 } 995 996 // destroyAllServices destroys all Cloud Services and deployments contained. 997 // This is needed to clean up broken environments, in which there are cloud 998 // services with no deployments. 999 func (env *azureEnviron) destroyAllServices() error { 1000 services, err := env.hostedServices() 1001 if err != nil { 1002 return err 1003 } 1004 snap := env.getSnapshot() 1005 for _, service := range services { 1006 if err := snap.api.DeleteHostedService(service.ServiceName); err != nil { 1007 return err 1008 } 1009 } 1010 return nil 1011 } 1012 1013 // splitInstanceId splits the specified instance.Id into its 1014 // cloud-service and role parts. Both values will be empty 1015 // if the instance-id is non-matching, and role will be empty 1016 // for legacy instance-ids. 1017 func (env *azureEnviron) splitInstanceId(id instance.Id) (service, role string) { 1018 prefix := env.getEnvPrefix() 1019 if !strings.HasPrefix(string(id), prefix) { 1020 return "", "" 1021 } 1022 fields := strings.Split(string(id)[len(prefix):], "-") 1023 service = prefix + fields[0] 1024 if len(fields) > 1 { 1025 role = fields[1] 1026 } 1027 return service, role 1028 } 1029 1030 // Instances is specified in the Environ interface. 1031 func (env *azureEnviron) Instances(ids []instance.Id) ([]instance.Instance, error) { 1032 snap := env.getSnapshot() 1033 1034 type instanceId struct { 1035 serviceName, roleName string 1036 } 1037 1038 instancesIds := make([]instanceId, len(ids)) 1039 serviceNames := make(set.Strings) 1040 for i, id := range ids { 1041 serviceName, roleName := env.splitInstanceId(id) 1042 if serviceName == "" { 1043 continue 1044 } 1045 instancesIds[i] = instanceId{ 1046 serviceName: serviceName, 1047 roleName: roleName, 1048 } 1049 serviceNames.Add(serviceName) 1050 } 1051 1052 // Map service names to gwacl.HostedServices. 1053 services, err := snap.api.ListSpecificHostedServices(&gwacl.ListSpecificHostedServicesRequest{ 1054 ServiceNames: serviceNames.Values(), 1055 }) 1056 if err != nil { 1057 return nil, err 1058 } 1059 if len(services) == 0 { 1060 return nil, environs.ErrNoInstances 1061 } 1062 hostedServices := make(map[string]*gwacl.HostedService) 1063 for _, s := range services { 1064 hostedService, err := snap.api.GetHostedServiceProperties(s.ServiceName, true) 1065 if err != nil { 1066 return nil, err 1067 } 1068 hostedServices[s.ServiceName] = hostedService 1069 } 1070 1071 var validInstances int 1072 instances := make([]instance.Instance, len(ids)) 1073 for i, id := range instancesIds { 1074 if id.serviceName == "" { 1075 // Previously determined to be an invalid instance ID. 1076 continue 1077 } 1078 hostedService := hostedServices[id.serviceName] 1079 instance, err := snap.getInstance(hostedService, id.roleName) 1080 if err == nil { 1081 instances[i] = instance 1082 validInstances++ 1083 } else { 1084 logger.Debugf("failed to get instance for role %q in service %q: %v", id.roleName, hostedService.ServiceName, err) 1085 } 1086 } 1087 1088 switch validInstances { 1089 case len(instances): 1090 err = nil 1091 case 0: 1092 instances = nil 1093 err = environs.ErrNoInstances 1094 default: 1095 err = environs.ErrPartialInstances 1096 } 1097 return instances, err 1098 } 1099 1100 // AllInstances is specified in the InstanceBroker interface. 1101 func (env *azureEnviron) AllInstances() ([]instance.Instance, error) { 1102 // The instance list is built using the list of all the Azure 1103 // Services (instance==service). 1104 // Acquire management API object. 1105 snap := env.getSnapshot() 1106 1107 serviceDescriptors, err := env.hostedServices() 1108 if err != nil { 1109 return nil, err 1110 } 1111 1112 var instances []instance.Instance 1113 for _, sd := range serviceDescriptors { 1114 hostedService, err := snap.api.GetHostedServiceProperties(sd.ServiceName, true) 1115 if err != nil { 1116 return nil, err 1117 } else if len(hostedService.Deployments) != 1 { 1118 continue 1119 } 1120 deployment := &hostedService.Deployments[0] 1121 for _, role := range deployment.RoleList { 1122 instance, err := snap.getInstance(hostedService, role.RoleName) 1123 if err != nil { 1124 return nil, err 1125 } 1126 instances = append(instances, instance) 1127 } 1128 } 1129 return instances, nil 1130 } 1131 1132 // getEnvPrefix returns the prefix used to name the objects specific to this 1133 // environment. The environment prefix name is immutable, so there is no need 1134 // to use a configuration snapshot. 1135 func (env *azureEnviron) getEnvPrefix() string { 1136 return fmt.Sprintf("juju-%s-", env.Config().Name()) 1137 } 1138 1139 // Storage is specified in the Environ interface. 1140 func (env *azureEnviron) Storage() storage.Storage { 1141 return env.getSnapshot().storage 1142 } 1143 1144 // Destroy is specified in the Environ interface. 1145 func (env *azureEnviron) Destroy() error { 1146 logger.Debugf("destroying environment %q", env.Config().Name()) 1147 1148 // Stop all instances. 1149 if err := env.destroyAllServices(); err != nil { 1150 return fmt.Errorf("cannot destroy instances: %v", err) 1151 } 1152 1153 // Delete vnet and affinity group. Deleting the virtual network 1154 // may fail for inexplicable reasons (cannot delete in the Azure 1155 // console either for some amount of time after deleting dependent 1156 // VMs), so we only treat this as a warning. There is no cost 1157 // associated with a vnet or affinity group. 1158 if err := env.deleteVirtualNetwork(); err != nil { 1159 logger.Warningf("cannot delete the environment's virtual network: %v", err) 1160 } 1161 if err := env.deleteAffinityGroup(); err != nil { 1162 logger.Warningf("cannot delete the environment's affinity group: %v", err) 1163 } 1164 1165 // Delete storage. 1166 // Deleting the storage is done last so that if something fails 1167 // half way through the Destroy() method, the storage won't be cleaned 1168 // up and thus an attempt to re-boostrap the environment will lead to 1169 // a "error: environment is already bootstrapped" error. 1170 if err := env.Storage().RemoveAll(); err != nil { 1171 return fmt.Errorf("cannot clean up storage: %v", err) 1172 } 1173 return nil 1174 } 1175 1176 // OpenPorts is specified in the Environ interface. However, Azure does not 1177 // support the global firewall mode. 1178 func (env *azureEnviron) OpenPorts(ports []network.PortRange) error { 1179 return nil 1180 } 1181 1182 // ClosePorts is specified in the Environ interface. However, Azure does not 1183 // support the global firewall mode. 1184 func (env *azureEnviron) ClosePorts(ports []network.PortRange) error { 1185 return nil 1186 } 1187 1188 // Ports is specified in the Environ interface. 1189 func (env *azureEnviron) Ports() ([]network.PortRange, error) { 1190 // TODO: implement this. 1191 return []network.PortRange{}, nil 1192 } 1193 1194 // Provider is specified in the Environ interface. 1195 func (env *azureEnviron) Provider() environs.EnvironProvider { 1196 return azureEnvironProvider{} 1197 } 1198 1199 var ( 1200 retryPolicy = gwacl.RetryPolicy{ 1201 NbRetries: 6, 1202 HttpStatusCodes: []int{ 1203 http.StatusConflict, 1204 http.StatusRequestTimeout, 1205 http.StatusInternalServerError, 1206 http.StatusServiceUnavailable, 1207 }, 1208 Delay: 10 * time.Second} 1209 ) 1210 1211 // updateStorageAccountKey queries the storage account key, and updates the 1212 // version cached in env.storageAccountKey. 1213 // 1214 // It takes a snapshot in order to preserve transactional integrity relative 1215 // to the snapshot's starting state, without having to lock the environment 1216 // for the duration. If there is a conflicting change to env relative to the 1217 // state recorded in the snapshot, this function will fail. 1218 func (env *azureEnviron) updateStorageAccountKey(snapshot *azureEnviron) (string, error) { 1219 // This method follows an RCU pattern, an optimistic technique to 1220 // implement atomic read-update transactions: get a consistent snapshot 1221 // of state; process data; enter critical section; check for conflicts; 1222 // write back changes. The advantage is that there are no long-held 1223 // locks, in particular while waiting for the request to Azure to 1224 // complete. 1225 // "Get a consistent snapshot of state" is the caller's responsibility. 1226 // The caller can use env.getSnapshot(). 1227 1228 // Process data: get a current account key from Azure. 1229 key, err := env.queryStorageAccountKey() 1230 if err != nil { 1231 return "", err 1232 } 1233 1234 // Enter critical section. 1235 env.Lock() 1236 defer env.Unlock() 1237 1238 // Check for conflicts: is the config still what it was? 1239 if env.ecfg != snapshot.ecfg { 1240 // The environment has been reconfigured while we were 1241 // working on this, so the key we just get may not be 1242 // appropriate any longer. So fail. 1243 // Whatever we were doing isn't likely to be right any more 1244 // anyway. Otherwise, it might be worth returning the key 1245 // just in case it still works, and proceed without updating 1246 // env.storageAccountKey. 1247 return "", fmt.Errorf("environment was reconfigured") 1248 } 1249 1250 // Write back changes. 1251 env.storageAccountKey = key 1252 return key, nil 1253 } 1254 1255 // getStorageContext obtains a context object for interfacing with Azure's 1256 // storage API. 1257 // For now, each invocation just returns a separate object. This is probably 1258 // wasteful (each context gets its own SSL connection) and may need optimizing 1259 // later. 1260 func (env *azureEnviron) getStorageContext() (*gwacl.StorageContext, error) { 1261 snap := env.getSnapshot() 1262 key := snap.storageAccountKey 1263 if key == "" { 1264 // We don't know the storage-account key yet. Request it. 1265 var err error 1266 key, err = env.updateStorageAccountKey(snap) 1267 if err != nil { 1268 return nil, err 1269 } 1270 } 1271 context := gwacl.StorageContext{ 1272 Account: snap.ecfg.storageAccountName(), 1273 Key: key, 1274 AzureEndpoint: gwacl.GetEndpoint(snap.ecfg.location()), 1275 RetryPolicy: retryPolicy, 1276 } 1277 return &context, nil 1278 } 1279 1280 // TODO(ericsnow) lp-1398055 1281 // Implement the ZonedEnviron interface. 1282 1283 // Region is specified in the HasRegion interface. 1284 func (env *azureEnviron) Region() (simplestreams.CloudSpec, error) { 1285 ecfg := env.getSnapshot().ecfg 1286 return simplestreams.CloudSpec{ 1287 Region: ecfg.location(), 1288 Endpoint: string(gwacl.GetEndpoint(ecfg.location())), 1289 }, nil 1290 } 1291 1292 // SupportsUnitPlacement is specified in the state.EnvironCapability interface. 1293 func (env *azureEnviron) SupportsUnitPlacement() error { 1294 if env.getSnapshot().ecfg.availabilitySetsEnabled() { 1295 return fmt.Errorf("unit placement is not supported with availability-sets-enabled") 1296 } 1297 return nil 1298 }