github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/provider/azure/environ.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package azure 5 6 import ( 7 stdcontext "context" 8 "fmt" 9 "net/url" 10 "sort" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/Azure/azure-sdk-for-go/sdk/azcore" 17 "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" 18 "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" 19 "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" 20 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v2" 21 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork" 22 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" 23 "github.com/juju/collections/set" 24 "github.com/juju/errors" 25 "github.com/juju/names/v5" 26 "github.com/juju/retry" 27 "github.com/juju/version/v2" 28 29 "github.com/juju/juju/agent" 30 "github.com/juju/juju/cloudconfig/instancecfg" 31 "github.com/juju/juju/cloudconfig/providerinit" 32 "github.com/juju/juju/core/arch" 33 "github.com/juju/juju/core/constraints" 34 "github.com/juju/juju/core/instance" 35 "github.com/juju/juju/core/os/ostype" 36 "github.com/juju/juju/environs" 37 environscloudspec "github.com/juju/juju/environs/cloudspec" 38 "github.com/juju/juju/environs/config" 39 "github.com/juju/juju/environs/context" 40 "github.com/juju/juju/environs/instances" 41 "github.com/juju/juju/environs/simplestreams" 42 "github.com/juju/juju/environs/tags" 43 "github.com/juju/juju/provider/azure/internal/armtemplates" 44 "github.com/juju/juju/provider/azure/internal/azureauth" 45 "github.com/juju/juju/provider/azure/internal/errorutils" 46 "github.com/juju/juju/provider/azure/internal/tracing" 47 "github.com/juju/juju/provider/common" 48 "github.com/juju/juju/tools" 49 jujuversion "github.com/juju/juju/version" 50 ) 51 52 const ( 53 jujuMachineNameTag = tags.JujuTagPrefix + "machine-name" 54 55 // minRootDiskSize is the minimum root disk size Azure 56 // accepts for a VM's OS disk. 57 // It will be used if none is specified by the user. 58 minRootDiskSize = 30 * 1024 // 30 GiB 59 60 // serviceErrorCodeDeploymentCannotBeCancelled is the error code for 61 // service errors in response to an attempt to cancel a deployment 62 // that cannot be cancelled. 63 serviceErrorCodeDeploymentCannotBeCancelled = "DeploymentCannotBeCancelled" 64 65 // serviceErrorCodeResourceGroupBeingDeleted is the error code for 66 // service errors in response to an attempt to cancel a deployment 67 // that has already started to be deleted. 68 serviceErrorCodeResourceGroupBeingDeleted = "ResourceGroupBeingDeleted" 69 70 // controllerAvailabilitySet is the name of the availability set 71 // used for controller machines. 72 controllerAvailabilitySet = "juju-controller" 73 74 // commonDeployment is used to create resources common to all models. 75 commonDeployment = "common" 76 77 computeAPIVersion = "2021-11-01" 78 networkAPIVersion = "2018-08-01" 79 ) 80 81 type azureEnviron struct { 82 environs.NoSpaceDiscoveryEnviron 83 84 // provider is the azureEnvironProvider used to open this environment. 85 provider *azureEnvironProvider 86 87 // cloud defines the cloud configuration for this environment. 88 cloud environscloudspec.CloudSpec 89 90 // location is the canonical location name. Use this instead 91 // of cloud.Region in API calls. 92 location string 93 94 // subscriptionId is the Azure account subscription ID. 95 subscriptionId string 96 97 // tenantId is the Azure account tenant ID. 98 tenantId string 99 100 // storageEndpoint is the Azure storage endpoint. This is the host 101 // portion of the storage endpoint URL only; use this instead of 102 // cloud.StorageEndpoint in API calls. 103 storageEndpoint string 104 105 // resourceGroup is the name of the Resource Group in the Azure 106 // subscription that corresponds to the environment. 107 resourceGroup string 108 109 // modelName is the name of the model. 110 modelName string 111 112 // namespace is used to create the machine and device hostnames. 113 namespace instance.Namespace 114 115 clientOptions policy.ClientOptions 116 credential azcore.TokenCredential 117 118 mu sync.Mutex 119 config *azureModelConfig 120 instanceTypes map[string]instances.InstanceType 121 commonResourcesCreated bool 122 } 123 124 var _ environs.Environ = (*azureEnviron)(nil) 125 126 // SetCloudSpec is specified in the environs.Environ interface. 127 func (env *azureEnviron) SetCloudSpec(ctx stdcontext.Context, cloud environscloudspec.CloudSpec) error { 128 if err := validateCloudSpec(cloud); err != nil { 129 return errors.Annotate(err, "validating cloud spec") 130 } 131 132 env.mu.Lock() 133 defer env.mu.Unlock() 134 135 // The Azure storage code wants the endpoint host only, not the URL. 136 storageEndpointURL, err := url.Parse(cloud.StorageEndpoint) 137 if err != nil { 138 return errors.Annotate(err, "parsing storage endpoint URL") 139 } 140 env.cloud = cloud 141 env.location = canonicalLocation(cloud.Region) 142 env.storageEndpoint = storageEndpointURL.Host 143 144 if err := env.initEnviron(ctx); err != nil { 145 return errors.Trace(err) 146 } 147 148 cfg := env.config 149 if env.resourceGroup == "" { 150 env.resourceGroup = cfg.resourceGroupName 151 } 152 // If no user specified resource group, make one from the model UUID. 153 if env.resourceGroup == "" { 154 modelTag := names.NewModelTag(cfg.UUID()) 155 if env.resourceGroup, err = env.resourceGroupName(ctx, modelTag, cfg.Name()); err != nil { 156 return errors.Trace(err) 157 } 158 } 159 env.modelName = cfg.Name() 160 return nil 161 } 162 163 func (env *azureEnviron) initEnviron(ctx stdcontext.Context) error { 164 credAttrs := env.cloud.Credential.Attributes() 165 env.subscriptionId = credAttrs[credAttrManagedSubscriptionId] 166 if env.subscriptionId == "" { 167 env.subscriptionId = credAttrs[credAttrSubscriptionId] 168 } 169 170 env.clientOptions = azcore.ClientOptions{ 171 Cloud: azureCloud(env.cloud.Name, env.cloud.Endpoint, env.cloud.IdentityEndpoint), 172 PerCallPolicies: []policy.Policy{ 173 &tracing.LoggingPolicy{ 174 Logger: logger.Child("azureapi"), 175 }, 176 }, 177 Telemetry: policy.TelemetryOptions{ 178 ApplicationID: "Juju/" + jujuversion.Current.String(), 179 }, 180 Transport: env.provider.config.Sender, 181 Retry: env.provider.config.Retry, 182 } 183 if env.provider.config.RequestInspector != nil { 184 env.clientOptions.PerCallPolicies = append(env.clientOptions.PerCallPolicies, env.provider.config.RequestInspector) 185 } 186 187 tenantID, err := azureauth.DiscoverTenantID(ctx, env.subscriptionId, arm.ClientOptions{ 188 ClientOptions: env.clientOptions, 189 }) 190 if err != nil { 191 return errors.Annotate(err, "getting tenant ID") 192 } 193 logger.Debugf("discovered tenant id: %s", tenantID) 194 env.tenantId = tenantID 195 196 appId := credAttrs[credAttrAppId] 197 appPassword := credAttrs[credAttrAppPassword] 198 env.credential, err = env.provider.config.CreateTokenCredential(appId, appPassword, tenantID, env.clientOptions) 199 if err != nil { 200 return errors.Annotate(err, "set up credential") 201 } 202 return nil 203 } 204 205 // PrepareForBootstrap is part of the Environ interface. 206 func (env *azureEnviron) PrepareForBootstrap(ctx environs.BootstrapContext, _ string) error { 207 if ctx.ShouldVerifyCredentials() { 208 cloudCtx := &context.CloudCallContext{ 209 Context: ctx.Context(), 210 InvalidateCredentialFunc: func(string) error { return nil }, 211 } 212 if err := verifyCredentials(env, cloudCtx); err != nil { 213 return errors.Trace(err) 214 } 215 } 216 return nil 217 } 218 219 // Create is part of the Environ interface. 220 func (env *azureEnviron) Create(ctx context.ProviderCallContext, args environs.CreateParams) error { 221 if err := verifyCredentials(env, ctx); err != nil { 222 return errors.Trace(err) 223 } 224 return errors.Trace(env.initResourceGroup(ctx, args.ControllerUUID, env.config.resourceGroupName != "", false)) 225 } 226 227 // Bootstrap is part of the Environ interface. 228 func (env *azureEnviron) Bootstrap( 229 ctx environs.BootstrapContext, 230 callCtx context.ProviderCallContext, 231 args environs.BootstrapParams, 232 ) (*environs.BootstrapResult, error) { 233 if err := env.initResourceGroup(callCtx, args.ControllerConfig.ControllerUUID(), env.config.resourceGroupName != "", true); err != nil { 234 return nil, errors.Annotate(err, "creating controller resource group") 235 } 236 result, err := common.Bootstrap(ctx, env, callCtx, args) 237 if err != nil { 238 logger.Errorf("bootstrap failed, destroying model: %v", err) 239 240 // First cancel the in-progress deployment. 241 var wg sync.WaitGroup 242 var cancelResult error 243 logger.Debugf("canceling deployment for bootstrap instance") 244 wg.Add(1) 245 go func(id string) { 246 defer wg.Done() 247 cancelResult = errors.Annotatef( 248 env.cancelDeployment(callCtx, id), 249 "canceling deployment %q", id, 250 ) 251 }(names.NewMachineTag(agent.BootstrapControllerId).String()) 252 wg.Wait() 253 if cancelResult != nil && !errors.IsNotFound(cancelResult) { 254 return nil, errors.Annotate(cancelResult, "aborting failed bootstrap") 255 } 256 257 // Then cleanup the resource group. 258 if err := env.Destroy(callCtx); err != nil { 259 logger.Errorf("failed to destroy model: %v", err) 260 } 261 return nil, errors.Trace(err) 262 } 263 return result, nil 264 } 265 266 // initResourceGroup creates a resource group for this environment. 267 func (env *azureEnviron) initResourceGroup(ctx context.ProviderCallContext, controllerUUID string, existingResourceGroup, controller bool) error { 268 env.mu.Lock() 269 resourceTags := tags.ResourceTags( 270 names.NewModelTag(env.config.Config.UUID()), 271 names.NewControllerTag(controllerUUID), 272 env.config, 273 ) 274 env.mu.Unlock() 275 276 resourceGroups, err := env.resourceGroupsClient() 277 if err != nil { 278 return errors.Trace(err) 279 } 280 if existingResourceGroup { 281 logger.Debugf("using existing resource group %q for model %q", env.resourceGroup, env.modelName) 282 g, err := resourceGroups.Get(ctx, env.resourceGroup, nil) 283 if err != nil { 284 return errorutils.HandleCredentialError(errors.Annotatef(err, "checking resource group %q", env.resourceGroup), ctx) 285 } 286 if region := toValue(g.Location); region != env.location { 287 return errors.Errorf("cannot use resource group in region %q when operating in region %q", region, env.location) 288 } 289 } else { 290 logger.Debugf("creating resource group %q for model %q", env.resourceGroup, env.modelName) 291 if _, err := resourceGroups.CreateOrUpdate(ctx, env.resourceGroup, armresources.ResourceGroup{ 292 Location: to.Ptr(env.location), 293 Tags: toMapPtr(resourceTags), 294 }, nil); err != nil { 295 return errorutils.HandleCredentialError(errors.Annotate(err, "creating resource group"), ctx) 296 } 297 } 298 299 if !controller { 300 // When we create a resource group for a non-controller model, 301 // we must create the common resources up-front. This is so 302 // that parallel deployments do not affect dynamic changes, 303 // e.g. those made by the firewaller. For the controller model, 304 // we fold the creation of these resources into the bootstrap 305 // machine's deployment. 306 if err := env.createCommonResourceDeployment(ctx, resourceTags, nil); err != nil { 307 return errors.Trace(err) 308 } 309 } 310 311 return nil 312 } 313 314 func (env *azureEnviron) createCommonResourceDeployment( 315 ctx context.ProviderCallContext, 316 tags map[string]string, 317 rules []*armnetwork.SecurityRule, 318 commonResources ...armtemplates.Resource, 319 ) error { 320 // Only create network resources if the user has not 321 // specified their own to use. 322 if env.config.virtualNetworkName == "" { 323 networkResources, _ := networkTemplateResources(env.location, tags, nil, rules) 324 commonResources = append(commonResources, networkResources...) 325 } 326 if len(commonResources) == 0 { 327 return nil 328 } 329 330 template := armtemplates.Template{Resources: commonResources} 331 if err := env.createDeployment( 332 ctx, 333 env.resourceGroup, 334 commonDeployment, 335 template, 336 ); err != nil { 337 return errors.Trace(err) 338 } 339 return nil 340 } 341 342 // ControllerInstances is specified in the Environ interface. 343 func (env *azureEnviron) ControllerInstances(ctx context.ProviderCallContext, controllerUUID string) ([]instance.Id, error) { 344 inst, err := env.allInstances(ctx, env.resourceGroup, false, controllerUUID) 345 if err != nil { 346 return nil, err 347 } 348 if len(inst) == 0 { 349 return nil, environs.ErrNoInstances 350 } 351 ids := make([]instance.Id, len(inst)) 352 for i, inst := range inst { 353 ids[i] = inst.Id() 354 } 355 return ids, nil 356 } 357 358 // Config is specified in the Environ interface. 359 func (env *azureEnviron) Config() *config.Config { 360 env.mu.Lock() 361 defer env.mu.Unlock() 362 return env.config.Config 363 } 364 365 // SetConfig is specified in the Environ interface. 366 func (env *azureEnviron) SetConfig(cfg *config.Config) error { 367 env.mu.Lock() 368 defer env.mu.Unlock() 369 370 var old *config.Config 371 if env.config != nil { 372 old = env.config.Config 373 } 374 ecfg, err := validateConfig(cfg, old) 375 if err != nil { 376 return err 377 } 378 env.config = ecfg 379 380 return nil 381 } 382 383 var unsupportedConstraints = []string{ 384 constraints.CpuPower, 385 constraints.Tags, 386 constraints.VirtType, 387 constraints.ImageID, 388 } 389 390 // ConstraintsValidator is defined on the Environs interface. 391 func (env *azureEnviron) ConstraintsValidator(ctx context.ProviderCallContext) (constraints.Validator, error) { 392 instanceTypes, err := env.getInstanceTypes(ctx) 393 if err != nil { 394 return nil, err 395 } 396 instTypeNames := make([]string, 0, len(instanceTypes)) 397 for instTypeName := range instanceTypes { 398 instTypeNames = append(instTypeNames, instTypeName) 399 } 400 sort.Strings(instTypeNames) 401 402 validator := constraints.NewValidator() 403 validator.RegisterUnsupported(unsupportedConstraints) 404 validator.RegisterVocabulary( 405 constraints.Arch, 406 []string{arch.AMD64}, 407 ) 408 validator.RegisterVocabulary( 409 constraints.InstanceType, 410 instTypeNames, 411 ) 412 validator.RegisterConflicts( 413 []string{constraints.InstanceType}, 414 []string{ 415 constraints.Mem, 416 constraints.Cores, 417 // TODO: move to a dynamic conflict for arch when azure supports more than amd64 418 //constraints.Arch, 419 }, 420 ) 421 return validator, nil 422 } 423 424 // PrecheckInstance is defined on the environs.InstancePrechecker interface. 425 func (env *azureEnviron) PrecheckInstance(ctx context.ProviderCallContext, args environs.PrecheckInstanceParams) error { 426 if _, err := env.findPlacementSubnet(ctx, args.Placement); err != nil { 427 return errors.Trace(err) 428 } 429 if !args.Constraints.HasInstanceType() { 430 return nil 431 } 432 // Constraint has an instance-type constraint so let's see if it is valid. 433 instanceTypes, err := env.getInstanceTypes(ctx) 434 if err != nil { 435 return err 436 } 437 for _, instanceType := range instanceTypes { 438 if instanceType.Name == *args.Constraints.InstanceType { 439 return nil 440 } 441 } 442 return fmt.Errorf("invalid instance type %q", *args.Constraints.InstanceType) 443 } 444 445 // StartInstance is specified in the InstanceBroker interface. 446 func (env *azureEnviron) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) { 447 if args.ControllerUUID == "" { 448 return nil, errors.New("missing controller UUID") 449 } 450 451 // Get the required configuration and config-dependent information 452 // required to create the instance. We take the lock just once, to 453 // ensure we obtain all information based on the same configuration. 454 env.mu.Lock() 455 envTags := tags.ResourceTags( 456 names.NewModelTag(env.config.Config.UUID()), 457 names.NewControllerTag(args.ControllerUUID), 458 env.config, 459 ) 460 imageStream := env.config.ImageStream() 461 envInstanceTypes, err := env.getInstanceTypesLocked(ctx) 462 if err != nil { 463 env.mu.Unlock() 464 return nil, errors.Trace(err) 465 } 466 instanceTypes := make(map[string]instances.InstanceType) 467 for k, v := range envInstanceTypes { 468 instanceTypes[k] = v 469 } 470 env.mu.Unlock() 471 472 // If the user has not specified a root-disk size, then 473 // set a sensible default. 474 var rootDisk uint64 475 // Azure complains if we try and specify a root disk size less than the minimum. 476 // See http://pad.lv/1645408 477 if args.Constraints.RootDisk != nil && *args.Constraints.RootDisk > minRootDiskSize { 478 rootDisk = *args.Constraints.RootDisk 479 } else { 480 rootDisk = minRootDiskSize 481 args.Constraints.RootDisk = &rootDisk 482 } 483 // Start the instance - if we get a quota error, that instance type is ignored 484 // and we'll try the next most expensive one, up to a reasonable number of attempts. 485 arch, err := args.Tools.OneArch() 486 if err != nil { 487 return nil, errors.Trace(err) 488 } 489 for i := 0; i < 15; i++ { 490 // Identify the instance type and image to provision. 491 instanceSpec, err := env.findInstanceSpec( 492 ctx, 493 instanceTypes, 494 &instances.InstanceConstraint{ 495 Region: env.location, 496 Base: args.InstanceConfig.Base, 497 Arch: arch, 498 Constraints: args.Constraints, 499 }, 500 imageStream, 501 ) 502 if err != nil { 503 return nil, err 504 } 505 if rootDisk < instanceSpec.InstanceType.RootDisk { 506 // The InstanceType's RootDisk is set to the maximum 507 // OS disk size; override it with the user-specified 508 // or default root disk size. 509 instanceSpec.InstanceType.RootDisk = rootDisk 510 } 511 result, err := env.startInstance(ctx, args, instanceSpec, envTags) 512 quotaErr, ok := errorutils.MaybeQuotaExceededError(err) 513 if ok { 514 logger.Warningf("%v quota exceeded error: %q", instanceSpec.InstanceType.Name, quotaErr.Error()) 515 deleteInstanceFamily(instanceTypes, instanceSpec.InstanceType.Name) 516 continue 517 } 518 return result, errorutils.SimpleError(err) 519 } 520 return nil, errors.New("no suitable instance type found for this subscription") 521 } 522 func (env *azureEnviron) startInstance( 523 ctx context.ProviderCallContext, args environs.StartInstanceParams, 524 instanceSpec *instances.InstanceSpec, envTags map[string]string, 525 ) (*environs.StartInstanceResult, error) { 526 527 // Pick tools by filtering the available tools down to the architecture of 528 // the image that will be provisioned. 529 selectedTools, err := args.Tools.Match(tools.Filter{ 530 Arch: instanceSpec.Image.Arch, 531 }) 532 if err != nil { 533 return nil, errors.Trace(err) 534 } 535 logger.Infof("picked agent binaries %q", selectedTools[0].Version) 536 537 // Finalize the instance config, which we'll render to CustomData below. 538 if err := args.InstanceConfig.SetTools(selectedTools); err != nil { 539 return nil, errors.Trace(err) 540 } 541 if err := instancecfg.FinishInstanceConfig( 542 args.InstanceConfig, env.Config(), 543 ); err != nil { 544 return nil, err 545 } 546 547 vmName, err := env.namespace.Hostname(args.InstanceConfig.MachineId) 548 if err != nil { 549 return nil, errors.Trace(err) 550 } 551 552 vmTags := make(map[string]string) 553 for k, v := range args.InstanceConfig.Tags { 554 vmTags[k] = v 555 } 556 // jujuMachineNameTag identifies the VM name, in which is encoded 557 // the Juju machine name. We tag all resources related to the 558 // machine with this. 559 vmTags[jujuMachineNameTag] = vmName 560 561 // Use a public IP by default unless a constraint 562 // explicitly forbids it. 563 usePublicIP := true 564 if args.Constraints.HasAllocatePublicIP() { 565 usePublicIP = *args.Constraints.AllocatePublicIP 566 } 567 err = env.createVirtualMachine( 568 ctx, vmName, vmTags, envTags, 569 instanceSpec, args, usePublicIP, true, 570 ) 571 // If there's a conflict, it's because another machine is 572 // being provisioned with the same availability set so 573 // retry and do not create the availability set. 574 if errorutils.IsConflictError(err) { 575 logger.Debugf("conflict creating %s, retrying...", vmName) 576 err = env.createVirtualMachine( 577 ctx, vmName, vmTags, envTags, 578 instanceSpec, args, usePublicIP, false, 579 ) 580 } 581 if err != nil { 582 logger.Debugf("creating instance failed, destroying: %v", err) 583 if err := env.StopInstances(ctx, instance.Id(vmName)); err != nil { 584 logger.Errorf("could not destroy failed virtual machine: %v", err) 585 } 586 return nil, errors.Annotatef(err, "creating virtual machine %q", vmName) 587 } 588 589 // Note: the instance is initialised without addresses to keep the 590 // API chatter down. We will refresh the instance if we need to know 591 // the addresses. 592 inst := &azureInstance{ 593 vmName: vmName, 594 provisioningState: armresources.ProvisioningStateCreating, 595 env: env, 596 } 597 amd64 := arch.AMD64 598 hc := &instance.HardwareCharacteristics{ 599 Arch: &amd64, 600 Mem: &instanceSpec.InstanceType.Mem, 601 RootDisk: &instanceSpec.InstanceType.RootDisk, 602 CpuCores: &instanceSpec.InstanceType.CpuCores, 603 } 604 return &environs.StartInstanceResult{ 605 Instance: inst, 606 Hardware: hc, 607 }, nil 608 } 609 610 // referenceInfo splits a reference to an Azure entity into an 611 // optional resource group and name, or just name if no 612 // resource group is specified. 613 func referenceInfo(entityRef string) (entityRG, entityName string) { 614 parts := strings.Split(entityRef, "/") 615 if len(parts) == 1 { 616 return "", entityRef 617 } 618 return parts[0], parts[1] 619 } 620 621 // createVirtualMachine creates a virtual machine and related resources. 622 // 623 // All resources created are tagged with the specified "vmTags", so if 624 // this function fails then all resources can be deleted by tag. 625 func (env *azureEnviron) createVirtualMachine( 626 ctx context.ProviderCallContext, 627 vmName string, 628 vmTags, envTags map[string]string, 629 instanceSpec *instances.InstanceSpec, 630 args environs.StartInstanceParams, 631 usePublicIP bool, 632 createAvailabilitySet bool, 633 ) error { 634 instanceConfig := args.InstanceConfig 635 apiPorts := make([]int, 0, 2) 636 if instanceConfig.IsController() { 637 apiPorts = append(apiPorts, instanceConfig.ControllerConfig.APIPort()) 638 if instanceConfig.ControllerConfig.AutocertDNSName() != "" { 639 // Open port 80 as well as it handles Let's Encrypt HTTP challenge. 640 apiPorts = append(apiPorts, 80) 641 } 642 } else { 643 ports := instanceConfig.APIInfo.Ports() 644 if len(ports) != 1 { 645 return errors.Errorf("expected one API port, found %v", ports) 646 } 647 apiPorts = append(apiPorts, ports[0]) 648 } 649 650 var nicDependsOn, vmDependsOn []string 651 var res []armtemplates.Resource 652 bootstrapping := instanceConfig.Bootstrap != nil 653 // We only need to deal with creating network resources 654 // if the user has not specified their own to use. 655 if bootstrapping && env.config.virtualNetworkName == "" && args.Placement == "" { 656 // We're starting the bootstrap machine, so we will create the 657 // networking resources in the same deployment. 658 networkResources, dependsOn := networkTemplateResources(env.location, envTags, apiPorts, nil) 659 res = append(res, networkResources...) 660 nicDependsOn = append(nicDependsOn, dependsOn...) 661 } 662 if !bootstrapping { 663 // Wait for the common resource deployment to complete. 664 if err := env.waitCommonResourcesCreated(ctx); err != nil { 665 return errors.Annotate( 666 err, "waiting for common resources to be created", 667 ) 668 } 669 } 670 671 osProfile, seriesOS, err := newOSProfile( 672 vmName, instanceConfig, 673 env.provider.config.GenerateSSHKey, 674 ) 675 if err != nil { 676 return errors.Annotate(err, "creating OS profile") 677 } 678 storageProfile, err := newStorageProfile( 679 vmName, 680 instanceSpec, 681 ) 682 if err != nil { 683 return errors.Annotate(err, "creating storage profile") 684 } 685 diskEncryptionID, err := env.diskEncryptionInfo(ctx, args.RootDisk, envTags) 686 if err != nil { 687 return environs.ZoneIndependentError(fmt.Errorf("creating disk encryption info: %w", err)) 688 } 689 if diskEncryptionID != "" && storageProfile.OSDisk.ManagedDisk != nil { 690 storageProfile.OSDisk.ManagedDisk.DiskEncryptionSet = &armcompute.DiskEncryptionSetParameters{ 691 ID: to.Ptr(diskEncryptionID), 692 } 693 } 694 695 var availabilitySetSubResource *armcompute.SubResource 696 availabilitySetName, err := availabilitySetName( 697 vmName, vmTags, instanceConfig.IsController(), 698 ) 699 if err != nil { 700 return errors.Annotate(err, "getting availability set name") 701 } 702 availabilitySetId := fmt.Sprintf( 703 `[resourceId('Microsoft.Compute/availabilitySets','%s')]`, 704 availabilitySetName, 705 ) 706 if availabilitySetName != "" { 707 availabilitySetSubResource = &armcompute.SubResource{ 708 ID: to.Ptr(availabilitySetId), 709 } 710 } 711 if !createAvailabilitySet && availabilitySetName != "" { 712 availabilitySet, err := env.availabilitySetsClient() 713 if err != nil { 714 return errors.Trace(err) 715 } 716 if _, err = availabilitySet.Get(ctx, env.resourceGroup, availabilitySetName, nil); err != nil { 717 return errors.Annotatef(err, "expecting availability set %q to be available", availabilitySetName) 718 } 719 } 720 if createAvailabilitySet && availabilitySetName != "" { 721 availabilitySetProperties := &armcompute.AvailabilitySetProperties{ 722 // Azure complains when the fault domain count 723 // is not specified, even though it is meant 724 // to be optional and default to the maximum. 725 // The maximum depends on the location, and 726 // there is no API to query it. 727 PlatformFaultDomainCount: to.Ptr(maxFaultDomains(env.location)), 728 } 729 res = append(res, armtemplates.Resource{ 730 APIVersion: computeAPIVersion, 731 Type: "Microsoft.Compute/availabilitySets", 732 Name: availabilitySetName, 733 Location: env.location, 734 Tags: envTags, 735 Properties: availabilitySetProperties, 736 Sku: &armtemplates.Sku{Name: "Aligned"}, 737 }) 738 vmDependsOn = append(vmDependsOn, availabilitySetId) 739 } 740 741 placementSubnetID, err := env.findPlacementSubnet(ctx, args.Placement) 742 if err != nil { 743 return environs.ZoneIndependentError(err) 744 } 745 vnetId, subnetIds, err := env.networkInfoForInstance(ctx, args, bootstrapping, instanceConfig.IsController(), placementSubnetID) 746 if err != nil { 747 return environs.ZoneIndependentError(err) 748 } 749 logger.Debugf("creating instance using vnet %v, subnets %q", vnetId, subnetIds) 750 751 if env.config.virtualNetworkName == "" && bootstrapping { 752 nicDependsOn = append(nicDependsOn, vnetId) 753 } 754 755 var publicIPAddressId string 756 if usePublicIP { 757 publicIPAddressName := vmName + "-public-ip" 758 publicIPAddressId = fmt.Sprintf(`[resourceId('Microsoft.Network/publicIPAddresses', '%s')]`, publicIPAddressName) 759 // Default to static public IP so address is preserved across reboots. 760 publicIPAddressAllocationMethod := armnetwork.IPAllocationMethodStatic 761 if env.config.loadBalancerSkuName == string(armnetwork.LoadBalancerSKUNameBasic) { 762 publicIPAddressAllocationMethod = armnetwork.IPAllocationMethodDynamic // preserve the settings that were used in Juju 2.4 and earlier 763 } 764 res = append(res, armtemplates.Resource{ 765 APIVersion: networkAPIVersion, 766 Type: "Microsoft.Network/publicIPAddresses", 767 Name: publicIPAddressName, 768 Location: env.location, 769 Tags: vmTags, 770 Sku: &armtemplates.Sku{Name: env.config.loadBalancerSkuName}, 771 Properties: &armnetwork.PublicIPAddressPropertiesFormat{ 772 PublicIPAddressVersion: to.Ptr(armnetwork.IPVersionIPv4), 773 PublicIPAllocationMethod: to.Ptr(publicIPAddressAllocationMethod), 774 }, 775 }) 776 } 777 778 // Create one NIC per subnet. The first one is the primary and has 779 // the public IP address if so configured. 780 var nics []*armcompute.NetworkInterfaceReference 781 for i, subnetID := range subnetIds { 782 primary := i == 0 783 ipConfig := &armnetwork.InterfaceIPConfigurationPropertiesFormat{ 784 Primary: to.Ptr(primary), 785 PrivateIPAllocationMethod: to.Ptr(armnetwork.IPAllocationMethodDynamic), 786 Subnet: &armnetwork.Subnet{ID: to.Ptr(string(subnetID))}, 787 } 788 if primary && usePublicIP { 789 ipConfig.PublicIPAddress = &armnetwork.PublicIPAddress{ 790 ID: to.Ptr(publicIPAddressId), 791 } 792 nicDependsOn = append(nicDependsOn, publicIPAddressId) 793 } 794 ipConfigName := "primary" 795 if i > 0 { 796 ipConfigName = fmt.Sprintf("interface-%d", i) 797 } 798 nicName := vmName + "-" + ipConfigName 799 nicId := fmt.Sprintf(`[resourceId('Microsoft.Network/networkInterfaces', '%s')]`, nicName) 800 ipConfigurations := []*armnetwork.InterfaceIPConfiguration{{ 801 Name: to.Ptr(ipConfigName), 802 Properties: ipConfig, 803 }} 804 res = append(res, armtemplates.Resource{ 805 APIVersion: networkAPIVersion, 806 Type: "Microsoft.Network/networkInterfaces", 807 Name: nicName, 808 Location: env.location, 809 Tags: vmTags, 810 Properties: &armnetwork.InterfacePropertiesFormat{ 811 IPConfigurations: ipConfigurations, 812 }, 813 DependsOn: nicDependsOn, 814 }) 815 vmDependsOn = append(vmDependsOn, nicId) 816 817 nics = append(nics, &armcompute.NetworkInterfaceReference{ 818 ID: to.Ptr(nicId), 819 Properties: &armcompute.NetworkInterfaceReferenceProperties{ 820 Primary: to.Ptr(primary), 821 }, 822 }) 823 } 824 825 res = append(res, armtemplates.Resource{ 826 APIVersion: computeAPIVersion, 827 Type: "Microsoft.Compute/virtualMachines", 828 Name: vmName, 829 Location: env.location, 830 Tags: vmTags, 831 Properties: &armcompute.VirtualMachineProperties{ 832 HardwareProfile: &armcompute.HardwareProfile{ 833 VMSize: to.Ptr(armcompute.VirtualMachineSizeTypes( 834 instanceSpec.InstanceType.Name, 835 )), 836 }, 837 StorageProfile: storageProfile, 838 OSProfile: osProfile, 839 NetworkProfile: &armcompute.NetworkProfile{ 840 NetworkInterfaces: nics, 841 }, 842 AvailabilitySet: availabilitySetSubResource, 843 }, 844 DependsOn: vmDependsOn, 845 }) 846 847 // On CentOS, we must add the CustomScript VM extension to run the 848 // CustomData script. 849 if seriesOS == ostype.CentOS { 850 properties, err := vmExtensionProperties(seriesOS) 851 if err != nil { 852 return errors.Annotate( 853 err, "creating virtual machine extension", 854 ) 855 } 856 res = append(res, armtemplates.Resource{ 857 APIVersion: computeAPIVersion, 858 Type: "Microsoft.Compute/virtualMachines/extensions", 859 Name: vmName + "/" + extensionName, 860 Location: env.location, 861 Tags: vmTags, 862 Properties: properties, 863 DependsOn: []string{"Microsoft.Compute/virtualMachines/" + vmName}, 864 }) 865 } 866 867 logger.Debugf("- creating virtual machine deployment in %q", env.resourceGroup) 868 template := armtemplates.Template{Resources: res} 869 if err := env.createDeployment( 870 ctx, 871 env.resourceGroup, 872 vmName, // deployment name 873 template, 874 ); err != nil { 875 return errors.Trace(err) 876 } 877 return nil 878 } 879 880 // maxFaultDomains returns the maximum number of fault domains for the 881 // given location/region. The numbers were taken from 882 // https://docs.microsoft.com/en-au/azure/virtual-machines/windows/manage-availability, 883 // as at 31 August 2017. 884 func maxFaultDomains(location string) int32 { 885 // From the page linked in the doc comment: 886 // "The number of fault domains for managed availability sets varies 887 // by region - either two or three per region." 888 // 889 // We record those that at the time of writing have 3. Anything 890 // else has at least 2, so we just assume 2. 891 switch location { 892 case 893 "eastus", 894 "eastus2", 895 "westus", 896 "centralus", 897 "northcentralus", 898 "southcentralus", 899 "northeurope", 900 "westeurope": 901 return 3 902 } 903 return 2 904 } 905 906 // waitCommonResourcesCreated waits for the "common" deployment to complete. 907 func (env *azureEnviron) waitCommonResourcesCreated(ctx context.ProviderCallContext) error { 908 env.mu.Lock() 909 defer env.mu.Unlock() 910 if env.commonResourcesCreated { 911 return nil 912 } 913 if _, err := env.waitCommonResourcesCreatedLocked(ctx); err != nil { 914 return errors.Trace(err) 915 } 916 env.commonResourcesCreated = true 917 return nil 918 } 919 920 type deploymentIncompleteError struct { 921 error 922 } 923 924 func (env *azureEnviron) waitCommonResourcesCreatedLocked(ctx context.ProviderCallContext) (*armresources.DeploymentExtended, error) { 925 // Release the lock while we're waiting, to avoid blocking others. 926 env.mu.Unlock() 927 defer env.mu.Lock() 928 929 deploy, err := env.deployClient() 930 if err != nil { 931 return nil, errors.Trace(err) 932 } 933 // Wait for up to 5 minutes, with a 5 second polling interval, 934 // for the "common" deployment to be in one of the terminal 935 // states. The deployment typically takes only around 30 seconds, 936 // but we allow for a longer duration to be defensive. 937 var deployment *armresources.DeploymentExtended 938 waitDeployment := func() error { 939 result, err := deploy.Get(ctx, env.resourceGroup, commonDeployment, nil) 940 if err != nil { 941 if errorutils.IsNotFoundError(err) { 942 // The controller model, and also models with bespoke 943 // networks, do not have a "common" deployment 944 // For controller models, common resources are created 945 // in the machine-0 deployment to keep bootstrap times optimal. 946 return nil 947 } 948 return errors.Annotate(err, "querying common deployment") 949 } 950 if result.Properties == nil { 951 return deploymentIncompleteError{errors.New("deployment incomplete")} 952 } 953 954 state := toValue(result.Properties.ProvisioningState) 955 if state == armresources.ProvisioningStateSucceeded { 956 // The deployment has succeeded, so the resources are 957 // ready for use. 958 deployment = to.Ptr(result.DeploymentExtended) 959 return nil 960 } 961 err = errors.Errorf("%q resource deployment status is %q", commonDeployment, state) 962 switch state { 963 case armresources.ProvisioningStateCanceled, 964 armresources.ProvisioningStateFailed, 965 armresources.ProvisioningStateDeleted: 966 default: 967 err = deploymentIncompleteError{err} 968 } 969 return err 970 } 971 if err := retry.Call(retry.CallArgs{ 972 Func: waitDeployment, 973 IsFatalError: func(err error) bool { 974 _, ok := err.(deploymentIncompleteError) 975 return !ok 976 }, 977 Attempts: -1, 978 Delay: 5 * time.Second, 979 MaxDuration: 5 * time.Minute, 980 Clock: env.provider.config.RetryClock, 981 }); err != nil { 982 return nil, errors.Trace(err) 983 } 984 return deployment, nil 985 } 986 987 // createAvailabilitySet creates the availability set for a machine to use 988 // if it doesn't already exist, and returns the availability set's ID. The 989 // algorithm used for choosing the availability set is: 990 // - if the machine is a controller, use the availability set name 991 // "juju-controller"; 992 // - if the machine has units assigned, create an availability 993 // name with a name based on the value of the tags.JujuUnitsDeployed tag 994 // in vmTags, if it exists; 995 // - otherwise, do not assign the machine to an availability set 996 func availabilitySetName( 997 vmName string, 998 vmTags map[string]string, 999 controller bool, 1000 ) (string, error) { 1001 logger.Debugf("selecting availability set for %q", vmName) 1002 if controller { 1003 return controllerAvailabilitySet, nil 1004 } 1005 1006 // We'll have to create an availability set. Use the name of one of the 1007 // services assigned to the machine. 1008 var availabilitySetName string 1009 if unitNames, ok := vmTags[tags.JujuUnitsDeployed]; ok { 1010 for _, unitName := range strings.Fields(unitNames) { 1011 if !names.IsValidUnit(unitName) { 1012 continue 1013 } 1014 serviceName, err := names.UnitApplication(unitName) 1015 if err != nil { 1016 return "", errors.Annotate(err, "getting application name") 1017 } 1018 availabilitySetName = serviceName 1019 break 1020 } 1021 } 1022 return availabilitySetName, nil 1023 } 1024 1025 // newStorageProfile creates the storage profile for a virtual machine, 1026 // based on the series and chosen instance spec. 1027 func newStorageProfile( 1028 vmName string, 1029 instanceSpec *instances.InstanceSpec, 1030 ) (*armcompute.StorageProfile, error) { 1031 logger.Debugf("creating storage profile for %q", vmName) 1032 1033 urnParts := strings.SplitN(instanceSpec.Image.Id, ":", 4) 1034 if len(urnParts) != 4 { 1035 return nil, errors.Errorf("invalid image ID %q", instanceSpec.Image.Id) 1036 } 1037 publisher := urnParts[0] 1038 offer := urnParts[1] 1039 sku := urnParts[2] 1040 vers := urnParts[3] 1041 1042 osDiskName := vmName 1043 osDiskSizeGB := mibToGB(instanceSpec.InstanceType.RootDisk) 1044 osDisk := &armcompute.OSDisk{ 1045 Name: to.Ptr(osDiskName), 1046 CreateOption: to.Ptr(armcompute.DiskCreateOptionTypesFromImage), 1047 Caching: to.Ptr(armcompute.CachingTypesReadWrite), 1048 DiskSizeGB: to.Ptr(int32(osDiskSizeGB)), 1049 ManagedDisk: &armcompute.ManagedDiskParameters{ 1050 StorageAccountType: to.Ptr(armcompute.StorageAccountTypesStandardLRS), 1051 }, 1052 } 1053 1054 return &armcompute.StorageProfile{ 1055 ImageReference: &armcompute.ImageReference{ 1056 Publisher: to.Ptr(publisher), 1057 Offer: to.Ptr(offer), 1058 SKU: to.Ptr(sku), 1059 Version: to.Ptr(vers), 1060 }, 1061 OSDisk: osDisk, 1062 }, nil 1063 } 1064 1065 func mibToGB(mib uint64) uint64 { 1066 b := float64(mib * 1024 * 1024) 1067 return uint64(b / (1000 * 1000 * 1000)) 1068 } 1069 1070 func newOSProfile( 1071 vmName string, 1072 instanceConfig *instancecfg.InstanceConfig, 1073 generateSSHKey func(string) (string, string, error), 1074 ) (*armcompute.OSProfile, ostype.OSType, error) { 1075 logger.Debugf("creating OS profile for %q", vmName) 1076 1077 customData, err := providerinit.ComposeUserData(instanceConfig, nil, AzureRenderer{}) 1078 if err != nil { 1079 return nil, ostype.Unknown, errors.Annotate(err, "composing user data") 1080 } 1081 1082 osProfile := &armcompute.OSProfile{ 1083 ComputerName: to.Ptr(vmName), 1084 CustomData: to.Ptr(string(customData)), 1085 } 1086 1087 instOS := ostype.OSTypeForName(instanceConfig.Base.OS) 1088 if err != nil { 1089 return nil, ostype.Unknown, errors.Trace(err) 1090 } 1091 switch instOS { 1092 case ostype.Ubuntu, ostype.CentOS: 1093 // SSH keys are handled by custom data, but must also be 1094 // specified in order to forego providing a password, and 1095 // disable password authentication. 1096 authorizedKeys := instanceConfig.AuthorizedKeys 1097 if len(authorizedKeys) == 0 { 1098 // Azure requires that machines be provisioned with 1099 // either a password or at least one SSH key. We 1100 // generate a key-pair to make Azure happy, but throw 1101 // away the private key so that nobody will be able 1102 // to log into the machine directly unless the keys 1103 // are updated with one that Juju tracks. 1104 _, public, err := generateSSHKey("") 1105 if err != nil { 1106 return nil, ostype.Unknown, errors.Trace(err) 1107 } 1108 authorizedKeys = public 1109 } 1110 1111 publicKeys := []*armcompute.SSHPublicKey{{ 1112 Path: to.Ptr("/home/ubuntu/.ssh/authorized_keys"), 1113 KeyData: to.Ptr(authorizedKeys), 1114 }} 1115 osProfile.AdminUsername = to.Ptr("ubuntu") 1116 osProfile.LinuxConfiguration = &armcompute.LinuxConfiguration{ 1117 DisablePasswordAuthentication: to.Ptr(true), 1118 SSH: &armcompute.SSHConfiguration{PublicKeys: publicKeys}, 1119 } 1120 default: 1121 return nil, ostype.Unknown, errors.NotSupportedf("%s", instOS) 1122 } 1123 return osProfile, instOS, nil 1124 } 1125 1126 // StopInstances is specified in the InstanceBroker interface. 1127 func (env *azureEnviron) StopInstances(ctx context.ProviderCallContext, ids ...instance.Id) error { 1128 if len(ids) == 0 { 1129 return nil 1130 } 1131 1132 // First up, cancel the deployments. Then we can identify the resources 1133 // that need to be deleted without racing with their creation. 1134 var wg sync.WaitGroup 1135 var existing int 1136 cancelResults := make([]error, len(ids)) 1137 for i, id := range ids { 1138 logger.Debugf("canceling deployment for instance %q", id) 1139 wg.Add(1) 1140 go func(i int, id instance.Id) { 1141 defer wg.Done() 1142 cancelResults[i] = errors.Annotatef( 1143 env.cancelDeployment(ctx, string(id)), 1144 "canceling deployment %q", id, 1145 ) 1146 }(i, id) 1147 } 1148 wg.Wait() 1149 for _, err := range cancelResults { 1150 if err == nil { 1151 existing++ 1152 } else if !errors.IsNotFound(err) { 1153 return err 1154 } 1155 } 1156 if existing == 0 { 1157 // None of the instances exist, so we can stop now. 1158 return nil 1159 } 1160 1161 // List network interfaces and public IP addresses. 1162 instanceNics, err := env.instanceNetworkInterfaces( 1163 ctx, 1164 env.resourceGroup, 1165 ) 1166 if err != nil { 1167 return errors.Trace(err) 1168 } 1169 instancePips, err := env.instancePublicIPAddresses( 1170 ctx, 1171 env.resourceGroup, 1172 ) 1173 if err != nil { 1174 return errors.Trace(err) 1175 } 1176 1177 // Delete the deployments, virtual machines, and related armresources. 1178 deleteResults := make([]error, len(ids)) 1179 for i, id := range ids { 1180 if errors.IsNotFound(cancelResults[i]) { 1181 continue 1182 } 1183 // The deployment does not exist, so there's nothing more to do. 1184 logger.Debugf("deleting instance %q", id) 1185 wg.Add(1) 1186 go func(i int, id instance.Id) { 1187 defer wg.Done() 1188 err := env.deleteVirtualMachine( 1189 ctx, 1190 id, 1191 instanceNics[id], 1192 instancePips[id], 1193 ) 1194 deleteResults[i] = errors.Annotatef( 1195 err, "deleting instance %q", id, 1196 ) 1197 }(i, id) 1198 } 1199 wg.Wait() 1200 for _, err := range deleteResults { 1201 if err != nil && !errors.IsNotFound(err) { 1202 return errors.Trace(err) 1203 } 1204 } 1205 1206 return nil 1207 } 1208 1209 // cancelDeployment cancels a template deployment. 1210 func (env *azureEnviron) cancelDeployment(ctx context.ProviderCallContext, name string) error { 1211 logger.Debugf("- canceling deployment %q", name) 1212 deploy, err := env.deployClient() 1213 if err != nil { 1214 return errors.Trace(err) 1215 } 1216 _, err = deploy.Cancel(ctx, env.resourceGroup, name, nil) 1217 if err != nil { 1218 if errorutils.IsNotFoundError(err) { 1219 return errors.NewNotFound(err, fmt.Sprintf("deployment %q not found", name)) 1220 } 1221 // Deployments can only canceled while they're running. 1222 if isDeployConflictError(err) { 1223 return nil 1224 } 1225 return errorutils.HandleCredentialError(errors.Annotatef(err, "canceling deployment %q", name), ctx) 1226 } 1227 return nil 1228 } 1229 1230 func isDeployConflictError(err error) bool { 1231 if errorutils.IsConflictError(err) { 1232 code := errorutils.ErrorCode(err) 1233 if code == serviceErrorCodeDeploymentCannotBeCancelled || 1234 code == serviceErrorCodeResourceGroupBeingDeleted { 1235 return true 1236 } 1237 } 1238 return false 1239 } 1240 1241 // deleteVirtualMachine deletes a virtual machine and all of the resources that 1242 // it owns, and any corresponding network security rules. 1243 func (env *azureEnviron) deleteVirtualMachine( 1244 ctx context.ProviderCallContext, 1245 instId instance.Id, 1246 networkInterfaces []*armnetwork.Interface, 1247 publicIPAddresses []*armnetwork.PublicIPAddress, 1248 ) error { 1249 vmName := string(instId) 1250 1251 // TODO(axw) delete resources concurrently. 1252 1253 compute, err := env.computeClient() 1254 if err != nil { 1255 return errors.Trace(err) 1256 } 1257 // The VM must be deleted first, to release the lock on its armresources. 1258 logger.Debugf("- deleting virtual machine (%s)", vmName) 1259 poller, err := compute.BeginDelete(ctx, env.resourceGroup, vmName, nil) 1260 if err == nil { 1261 _, err = poller.PollUntilDone(ctx, nil) 1262 } 1263 if err != nil { 1264 if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) { 1265 return errors.Annotate(err, "deleting virtual machine") 1266 } 1267 } 1268 // Delete the managed OS disk. 1269 logger.Debugf("- deleting OS disk (%s)", vmName) 1270 disks, err := env.disksClient() 1271 if err != nil { 1272 return errors.Trace(err) 1273 } 1274 diskPoller, err := disks.BeginDelete(ctx, env.resourceGroup, vmName, nil) 1275 if err == nil { 1276 _, err = diskPoller.PollUntilDone(ctx, nil) 1277 } 1278 if err != nil { 1279 if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) { 1280 return errors.Annotate(err, "deleting OS disk") 1281 } 1282 } 1283 logger.Debugf("- deleting security rules (%s)", vmName) 1284 if err := deleteInstanceNetworkSecurityRules( 1285 ctx, 1286 env, instId, networkInterfaces, 1287 ); err != nil { 1288 return errors.Annotate(err, "deleting network security rules") 1289 } 1290 1291 logger.Debugf("- deleting network interfaces (%s)", vmName) 1292 interfaces, err := env.interfacesClient() 1293 if err != nil { 1294 return errors.Trace(err) 1295 } 1296 for _, nic := range networkInterfaces { 1297 nicName := toValue(nic.Name) 1298 logger.Tracef("deleting NIC %q", nicName) 1299 nicPoller, err := interfaces.BeginDelete(ctx, env.resourceGroup, nicName, nil) 1300 if err == nil { 1301 _, err = nicPoller.PollUntilDone(ctx, nil) 1302 } 1303 if err != nil { 1304 if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) { 1305 return errors.Annotate(err, "deleting NIC") 1306 } 1307 } 1308 } 1309 1310 logger.Debugf("- deleting public IPs (%s)", vmName) 1311 publicAddresses, err := env.publicAddressesClient() 1312 if err != nil { 1313 return errors.Trace(err) 1314 } 1315 for _, pip := range publicIPAddresses { 1316 pipName := toValue(pip.Name) 1317 logger.Tracef("deleting public IP %q", pipName) 1318 ipPoller, err := publicAddresses.BeginDelete(ctx, env.resourceGroup, pipName, nil) 1319 if err == nil { 1320 _, err = ipPoller.PollUntilDone(ctx, nil) 1321 } 1322 if err != nil { 1323 if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) { 1324 return errors.Annotate(err, "deleting public IP") 1325 } 1326 } 1327 } 1328 1329 // The deployment must be deleted last, or we risk leaking armresources. 1330 logger.Debugf("- deleting deployment (%s)", vmName) 1331 deploy, err := env.deployClient() 1332 if err != nil { 1333 return errors.Trace(err) 1334 } 1335 deploymentPoller, err := deploy.BeginDelete(ctx, env.resourceGroup, vmName, nil) 1336 if err == nil { 1337 _, err = deploymentPoller.PollUntilDone(ctx, nil) 1338 } 1339 if err != nil { 1340 ignoreError := isDeployConflictError(err) || errorutils.IsNotFoundError(err) 1341 if !ignoreError || errorutils.MaybeInvalidateCredential(err, ctx) { 1342 return errors.Annotate(err, "deleting deployment") 1343 } 1344 } 1345 return nil 1346 } 1347 1348 // AdoptResources is part of the Environ interface. 1349 func (env *azureEnviron) AdoptResources(ctx context.ProviderCallContext, controllerUUID string, _ version.Number) error { 1350 resourceGroups, err := env.resourceGroupsClient() 1351 if err != nil { 1352 return errors.Trace(err) 1353 } 1354 err = env.updateGroupControllerTag(ctx, resourceGroups, env.resourceGroup, controllerUUID) 1355 if err != nil { 1356 // If we can't update the group there's no point updating the 1357 // contained resources - the group will be killed if the 1358 // controller is destroyed, taking the other things with it. 1359 return errors.Trace(err) 1360 } 1361 1362 providers, err := env.providersClient() 1363 if err != nil { 1364 // If we can't update the group there's no point updating the 1365 // contained resources - the group will be killed if the 1366 // controller is destroyed, taking the other things with it. 1367 return errors.Trace(err) 1368 } 1369 apiVersions, err := collectAPIVersions(ctx, providers) 1370 if err != nil { 1371 return errors.Trace(err) 1372 } 1373 1374 resources, err := env.resourcesClient() 1375 if err != nil { 1376 return errors.Trace(err) 1377 } 1378 var failed []string 1379 pager := resources.NewListByResourceGroupPager(env.resourceGroup, nil) 1380 for pager.More() { 1381 next, err := pager.NextPage(ctx) 1382 if err != nil { 1383 return errorutils.HandleCredentialError(errors.Annotate(err, "listing resources"), ctx) 1384 } 1385 for _, res := range next.Value { 1386 apiVersion := apiVersions[toValue(res.Type)] 1387 err := env.updateResourceControllerTag( 1388 ctx, 1389 resources, 1390 res, controllerUUID, apiVersion, 1391 ) 1392 if err != nil { 1393 name := toValue(res.Name) 1394 logger.Errorf("error updating resource tags for %q: %v", name, err) 1395 failed = append(failed, name) 1396 } 1397 } 1398 } 1399 if len(failed) > 0 { 1400 return errors.Errorf("failed to update controller for some resources: %v", failed) 1401 } 1402 1403 return nil 1404 } 1405 1406 func (env *azureEnviron) updateGroupControllerTag(ctx context.ProviderCallContext, client *armresources.ResourceGroupsClient, groupName, controllerUUID string) error { 1407 group, err := client.Get(ctx, groupName, nil) 1408 if err != nil { 1409 return errorutils.HandleCredentialError(errors.Trace(err), ctx) 1410 } 1411 1412 logger.Debugf( 1413 "updating resource group %s juju controller uuid to %s", 1414 toValue(group.Name), controllerUUID, 1415 ) 1416 group.Tags[tags.JujuController] = to.Ptr(controllerUUID) 1417 1418 // The Azure API forbids specifying ProvisioningState on the update. 1419 if group.Properties != nil { 1420 (*group.Properties).ProvisioningState = nil 1421 } 1422 1423 _, err = client.CreateOrUpdate(ctx, groupName, group.ResourceGroup, nil) 1424 return errorutils.HandleCredentialError(errors.Annotatef(err, "updating controller for resource group %q", groupName), ctx) 1425 } 1426 1427 func (env *azureEnviron) updateResourceControllerTag( 1428 ctx context.ProviderCallContext, 1429 client *armresources.Client, 1430 stubResource *armresources.GenericResourceExpanded, 1431 controllerUUID string, 1432 apiVersion string, 1433 ) error { 1434 stubTags := toMap(stubResource.Tags) 1435 if stubTags[tags.JujuController] == controllerUUID { 1436 // No update needed. 1437 return nil 1438 } 1439 1440 // Need to get the resource individually to ensure that the 1441 // properties are populated. 1442 resource, err := client.GetByID(ctx, toValue(stubResource.ID), apiVersion, nil) 1443 if err != nil { 1444 return errorutils.HandleCredentialError(errors.Annotatef(err, "getting full resource %q", toValue(stubResource.Name)), ctx) 1445 } 1446 1447 logger.Debugf("updating %s juju controller UUID to %s", toValue(stubResource.ID), controllerUUID) 1448 if resource.Tags == nil { 1449 resource.Tags = make(map[string]*string) 1450 } 1451 resource.Tags[tags.JujuController] = to.Ptr(controllerUUID) 1452 _, err = client.BeginCreateOrUpdateByID( 1453 ctx, 1454 toValue(stubResource.ID), 1455 apiVersion, 1456 resource.GenericResource, 1457 nil, 1458 ) 1459 return errorutils.HandleCredentialError(errors.Annotatef(err, "updating controller for %q", toValue(resource.Name)), ctx) 1460 } 1461 1462 var ( 1463 runningInstStates = []armresources.ProvisioningState{ 1464 armresources.ProvisioningStateCreating, 1465 armresources.ProvisioningStateUpdating, 1466 armresources.ProvisioningStateSucceeded, 1467 } 1468 ) 1469 1470 // Instances is specified in the Environ interface. 1471 func (env *azureEnviron) Instances(ctx context.ProviderCallContext, ids []instance.Id) ([]instances.Instance, error) { 1472 if len(ids) == 0 { 1473 return nil, nil 1474 } 1475 insts := make([]instances.Instance, len(ids)) 1476 // Make a series of requests to cope with eventual consistency. 1477 // Each request will attempt to add more instances to the requested set. 1478 err := retry.Call(retry.CallArgs{ 1479 Func: func() error { 1480 var need []instance.Id 1481 for i, inst := range insts { 1482 if inst == nil { 1483 need = append(need, ids[i]) 1484 } 1485 } 1486 return env.gatherInstances(ctx, need, insts, env.resourceGroup, true) 1487 }, 1488 IsFatalError: func(err error) bool { 1489 return err != environs.ErrPartialInstances 1490 }, 1491 Attempts: -1, 1492 Delay: 200 * time.Millisecond, 1493 MaxDuration: 5 * time.Second, 1494 Clock: env.provider.config.RetryClock, 1495 }) 1496 1497 if err == environs.ErrPartialInstances { 1498 for _, inst := range insts { 1499 if inst != nil { 1500 return insts, environs.ErrPartialInstances 1501 } 1502 } 1503 return nil, environs.ErrNoInstances 1504 } 1505 if err != nil { 1506 return nil, errors.Trace(err) 1507 } 1508 return insts, nil 1509 } 1510 1511 // AllInstances is specified in the InstanceBroker interface. 1512 func (env *azureEnviron) AllInstances(ctx context.ProviderCallContext) ([]instances.Instance, error) { 1513 return env.allInstances(ctx, env.resourceGroup, true, "") 1514 } 1515 1516 // AllRunningInstances is specified in the InstanceBroker interface. 1517 func (env *azureEnviron) AllRunningInstances(ctx context.ProviderCallContext) ([]instances.Instance, error) { 1518 return env.allInstances(ctx, env.resourceGroup, true, "", runningInstStates...) 1519 } 1520 1521 // gatherInstances tries to get information on each instance id 1522 // whose corresponding insts slot is nil. 1523 // This function returns environs.ErrPartialInstances if the 1524 // insts slice has not been completely filled. 1525 func (env *azureEnviron) gatherInstances( 1526 ctx context.ProviderCallContext, 1527 ids []instance.Id, 1528 insts []instances.Instance, 1529 resourceGroup string, 1530 refreshAddresses bool, 1531 instStates ...armresources.ProvisioningState, 1532 ) error { 1533 allInst, err := env.allInstances(ctx, resourceGroup, refreshAddresses, "", instStates...) 1534 if err != nil { 1535 return errors.Trace(err) 1536 } 1537 1538 numFound := 0 1539 // For each requested id, add it to the returned instances 1540 // if we find it in the latest queried cloud instances. 1541 for i, id := range ids { 1542 if insts[i] != nil { 1543 numFound++ 1544 continue 1545 } 1546 for _, inst := range allInst { 1547 if inst.Id() != id { 1548 continue 1549 } 1550 insts[i] = inst 1551 numFound++ 1552 } 1553 } 1554 if numFound < len(ids) { 1555 return environs.ErrPartialInstances 1556 } 1557 return nil 1558 } 1559 1560 // allInstances returns all instances in the environment 1561 // with one of the specified instance states. 1562 // If no instance states are specified, then return all instances. 1563 func (env *azureEnviron) allInstances( 1564 ctx context.ProviderCallContext, 1565 resourceGroup string, 1566 refreshAddresses bool, 1567 controllerUUID string, 1568 instStates ...armresources.ProvisioningState, 1569 ) ([]instances.Instance, error) { 1570 // Instances may be queued for deployment but provisioning has not yet started. 1571 queued, err := env.allQueuedInstances(ctx, resourceGroup, controllerUUID != "") 1572 if err != nil { 1573 return nil, errors.Trace(err) 1574 } 1575 provisioned, err := env.allProvisionedInstances(ctx, resourceGroup, controllerUUID, instStates...) 1576 if err != nil { 1577 return nil, errors.Trace(err) 1578 } 1579 1580 // Any provisioned or provisioning instances take precedence 1581 // over any entries in the queued slice. 1582 seenInst := set.NewStrings() 1583 azureInstances := provisioned 1584 for _, p := range provisioned { 1585 seenInst.Add(string(p.Id())) 1586 } 1587 for _, q := range queued { 1588 if seenInst.Contains(string(q.Id())) { 1589 continue 1590 } 1591 azureInstances = append(azureInstances, q) 1592 } 1593 1594 // Get the instance addresses if needed. 1595 if len(azureInstances) > 0 && refreshAddresses { 1596 if err := env.setInstanceAddresses( 1597 ctx, 1598 resourceGroup, 1599 azureInstances, 1600 ); err != nil { 1601 return nil, errors.Trace(err) 1602 } 1603 } 1604 1605 var result []instances.Instance 1606 for _, inst := range azureInstances { 1607 result = append(result, inst) 1608 } 1609 sort.Slice(result, func(i, j int) bool { 1610 return result[i].Id() < result[j].Id() 1611 }) 1612 return result, nil 1613 } 1614 1615 // allQueuedInstances returns any pending or failed machine deployments 1616 // in the given resource group. 1617 func (env *azureEnviron) allQueuedInstances( 1618 ctx context.ProviderCallContext, 1619 resourceGroup string, 1620 controllerOnly bool, 1621 ) ([]*azureInstance, error) { 1622 deploy, err := env.deployClient() 1623 if err != nil { 1624 return nil, errors.Trace(err) 1625 } 1626 var azureInstances []*azureInstance 1627 pager := deploy.NewListByResourceGroupPager(resourceGroup, nil) 1628 for pager.More() { 1629 next, err := pager.NextPage(ctx) 1630 if err != nil { 1631 if errorutils.IsNotFoundError(err) { 1632 // This will occur if the resource group does not 1633 // exist, e.g. in a fresh hosted environment. 1634 return nil, nil 1635 } 1636 return nil, errorutils.HandleCredentialError(errors.Trace(err), ctx) 1637 } 1638 for _, deployment := range next.Value { 1639 deployProvisioningState := armresources.ProvisioningStateNotSpecified 1640 deployError := "Failed" 1641 if deployment.Properties != nil { 1642 deployProvisioningState = toValue(deployment.Properties.ProvisioningState) 1643 deployError = string(deployProvisioningState) 1644 if deployment.Properties.Error != nil { 1645 deployError = toValue(deployment.Properties.Error.Message) 1646 if deployment.Properties.Error.Details != nil && len(deployment.Properties.Error.Details) > 0 { 1647 deployError = toValue((deployment.Properties.Error.Details)[0].Message) 1648 } 1649 } 1650 } 1651 switch deployProvisioningState { 1652 case armresources.ProvisioningStateAccepted, 1653 armresources.ProvisioningStateCreating, 1654 armresources.ProvisioningStateRunning, 1655 armresources.ProvisioningStateFailed, 1656 armresources.ProvisioningStateCanceled, 1657 armresources.ProvisioningStateNotSpecified: 1658 default: 1659 continue 1660 } 1661 name := toValue(deployment.Name) 1662 if _, err := names.ParseMachineTag(name); err != nil { 1663 // Deployments we create for Juju machines are named 1664 // with the machine tag. We also create a "common" 1665 // deployment, so this will exclude that VM and any 1666 // other stray deployment armresources. 1667 continue 1668 } 1669 if deployment.Properties == nil || deployment.Properties.Dependencies == nil { 1670 continue 1671 } 1672 if controllerOnly && !isControllerDeployment(deployment) { 1673 continue 1674 } 1675 if len(deployment.Tags) == 0 { 1676 continue 1677 } 1678 if toValue(deployment.Tags[tags.JujuModel]) != env.Config().UUID() { 1679 continue 1680 } 1681 provisioningState := armresources.ProvisioningStateCreating 1682 switch deployProvisioningState { 1683 case armresources.ProvisioningStateFailed, 1684 armresources.ProvisioningStateCanceled: 1685 provisioningState = armresources.ProvisioningStateFailed 1686 } 1687 inst := &azureInstance{ 1688 vmName: name, 1689 provisioningState: provisioningState, 1690 provisioningError: deployError, 1691 env: env, 1692 } 1693 azureInstances = append(azureInstances, inst) 1694 } 1695 } 1696 return azureInstances, nil 1697 } 1698 1699 func isControllerDeployment(deployment *armresources.DeploymentExtended) bool { 1700 if deployment.Properties == nil { 1701 return false 1702 } 1703 for _, d := range deployment.Properties.Dependencies { 1704 if d.DependsOn == nil { 1705 continue 1706 } 1707 if toValue(d.ResourceType) != "Microsoft.Compute/virtualMachines" { 1708 continue 1709 } 1710 for _, on := range d.DependsOn { 1711 if toValue(on.ResourceType) != "Microsoft.Compute/availabilitySets" { 1712 continue 1713 } 1714 if toValue(on.ResourceName) == controllerAvailabilitySet { 1715 return true 1716 } 1717 } 1718 } 1719 return false 1720 } 1721 1722 // allProvisionedInstances returns all of the instances 1723 // in the given resource group. 1724 func (env *azureEnviron) allProvisionedInstances( 1725 ctx context.ProviderCallContext, 1726 resourceGroup string, 1727 controllerUUID string, 1728 instStates ...armresources.ProvisioningState, 1729 ) ([]*azureInstance, error) { 1730 compute, err := env.computeClient() 1731 if err != nil { 1732 return nil, errors.Trace(err) 1733 } 1734 1735 var azureInstances []*azureInstance 1736 pager := compute.NewListPager(resourceGroup, nil) 1737 for pager.More() { 1738 next, err := pager.NextPage(ctx) 1739 if err != nil { 1740 if errorutils.IsNotFoundError(err) { 1741 // This will occur if the resource group does not 1742 // exist, e.g. in a fresh hosted environment. 1743 return nil, nil 1744 } 1745 return nil, errorutils.HandleCredentialError(errors.Trace(err), ctx) 1746 } 1747 for _, vm := range next.Value { 1748 name := toValue(vm.Name) 1749 provisioningState := armresources.ProvisioningStateNotSpecified 1750 if vm.Properties != nil { 1751 provisioningState = armresources.ProvisioningState(toValue(vm.Properties.ProvisioningState)) 1752 } 1753 if len(instStates) > 0 { 1754 haveState := false 1755 for _, wantState := range instStates { 1756 if provisioningState == wantState { 1757 haveState = true 1758 break 1759 } 1760 } 1761 if !haveState { 1762 continue 1763 } 1764 } 1765 if !isControllerInstance(vm, controllerUUID) { 1766 continue 1767 } 1768 if len(vm.Tags) == 0 { 1769 continue 1770 } 1771 if toValue(vm.Tags[tags.JujuModel]) != env.Config().UUID() { 1772 continue 1773 } 1774 inst := &azureInstance{ 1775 vmName: name, 1776 provisioningState: provisioningState, 1777 env: env, 1778 } 1779 azureInstances = append(azureInstances, inst) 1780 } 1781 } 1782 return azureInstances, nil 1783 } 1784 1785 func isControllerInstance(vm *armcompute.VirtualMachine, controllerUUID string) bool { 1786 if controllerUUID == "" { 1787 return true 1788 } 1789 vmTags := vm.Tags 1790 if v, ok := vmTags[tags.JujuIsController]; !ok || toValue(v) != "true" { 1791 return false 1792 } 1793 if v, ok := vmTags[tags.JujuController]; !ok || toValue(v) != controllerUUID { 1794 return false 1795 } 1796 return true 1797 } 1798 1799 // Destroy is specified in the Environ interface. 1800 func (env *azureEnviron) Destroy(ctx context.ProviderCallContext) error { 1801 logger.Debugf("destroying model %q", env.modelName) 1802 logger.Debugf("- deleting resource group %q", env.resourceGroup) 1803 if err := env.deleteResourceGroup(ctx, env.resourceGroup); err != nil { 1804 return errors.Trace(err) 1805 } 1806 // Resource groups are self-contained and fully encompass 1807 // all environ armresources. Once you delete the group, there 1808 // is nothing else to do. 1809 return nil 1810 } 1811 1812 // DestroyController is specified in the Environ interface. 1813 func (env *azureEnviron) DestroyController(ctx context.ProviderCallContext, controllerUUID string) error { 1814 logger.Debugf("destroying model %q", env.modelName) 1815 logger.Debugf("deleting resource groups") 1816 if err := env.deleteControllerManagedResourceGroups(ctx, controllerUUID); err != nil { 1817 return errors.Trace(err) 1818 } 1819 // Resource groups are self-contained and fully encompass 1820 // all environ armresources. Once you delete the group, there 1821 // is nothing else to do. 1822 return nil 1823 } 1824 1825 func (env *azureEnviron) deleteControllerManagedResourceGroups(ctx context.ProviderCallContext, controllerUUID string) error { 1826 resourceGroups, err := env.resourceGroupsClient() 1827 if err != nil { 1828 return errors.Trace(err) 1829 } 1830 filter := fmt.Sprintf( 1831 "tagName eq '%s' and tagValue eq '%s'", 1832 tags.JujuController, controllerUUID, 1833 ) 1834 pager := resourceGroups.NewListPager(&armresources.ResourceGroupsClientListOptions{ 1835 Filter: to.Ptr(filter), 1836 }) 1837 var groupNames []*string 1838 for pager.More() { 1839 next, err := pager.NextPage(ctx) 1840 if err != nil { 1841 return errorutils.HandleCredentialError(errors.Annotate(err, "listing resource groups"), ctx) 1842 } 1843 // Walk all the pages of results so we can get a total list of groups to remove. 1844 for _, result := range next.Value { 1845 groupNames = append(groupNames, result.Name) 1846 } 1847 } 1848 // Deleting groups can take a long time, so make sure they are 1849 // deleted in parallel. 1850 var wg sync.WaitGroup 1851 errs := make([]error, len(groupNames)) 1852 for i, name := range groupNames { 1853 groupName := toValue(name) 1854 logger.Debugf(" - deleting resource group %q", groupName) 1855 wg.Add(1) 1856 go func(i int) { 1857 defer wg.Done() 1858 if err := env.deleteResourceGroup(ctx, groupName); err != nil { 1859 errs[i] = errors.Annotatef( 1860 err, "deleting resource group %q", groupName, 1861 ) 1862 } 1863 }(i) 1864 } 1865 wg.Wait() 1866 1867 // If there is just one error, return it. If there are multiple, 1868 // then combine their messages. 1869 var nonNilErrs []error 1870 for _, err := range errs { 1871 if err != nil { 1872 nonNilErrs = append(nonNilErrs, err) 1873 } 1874 } 1875 switch len(nonNilErrs) { 1876 case 0: 1877 return nil 1878 case 1: 1879 return nonNilErrs[0] 1880 } 1881 combined := make([]string, len(nonNilErrs)) 1882 for i, err := range nonNilErrs { 1883 combined[i] = err.Error() 1884 } 1885 return errors.New(strings.Join(combined, "; ")) 1886 } 1887 1888 func (env *azureEnviron) deleteResourceGroup(ctx context.ProviderCallContext, resourceGroup string) error { 1889 // For user specified, existing resource groups, delete the contents, not the group. 1890 if env.config.resourceGroupName != "" { 1891 return env.deleteResourcesInGroup(ctx, resourceGroup) 1892 } 1893 resourceGroups, err := env.resourceGroupsClient() 1894 if err != nil { 1895 return errors.Trace(err) 1896 } 1897 poller, err := resourceGroups.BeginDelete(ctx, resourceGroup, nil) 1898 if err == nil { 1899 _, err = poller.PollUntilDone(ctx, nil) 1900 } 1901 if err != nil { 1902 if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) { 1903 return errors.Annotatef(err, "deleting resource group %q", resourceGroup) 1904 } 1905 } 1906 return nil 1907 } 1908 1909 func (env *azureEnviron) deleteResourcesInGroup(ctx context.ProviderCallContext, resourceGroup string) (err error) { 1910 logger.Debugf("deleting all resources in %s", resourceGroup) 1911 1912 defer func() { 1913 err = errorutils.HandleCredentialError(err, ctx) 1914 }() 1915 1916 // Find all the resources tagged as belonging to this model. 1917 filter := fmt.Sprintf("tagName eq '%s' and tagValue eq '%s'", tags.JujuModel, env.config.UUID()) 1918 resourceItems, err := env.getModelResources(ctx, resourceGroup, filter) 1919 if err != nil { 1920 return errors.Trace(err) 1921 } 1922 1923 // Older APIs can ignore the filter above, so query the hard way just in case. 1924 if len(resourceItems) == 0 { 1925 resourceItems, err = env.getModelResources(ctx, resourceGroup, filter) 1926 if err != nil { 1927 return errors.Trace(err) 1928 } 1929 } 1930 1931 // These will be deleted as part of stopping the instance below. 1932 machineResourceTypes := set.NewStrings( 1933 "Microsoft.Compute/virtualMachines", 1934 "Microsoft.Compute/disks", 1935 "Microsoft.Network/publicIPAddresses", 1936 "Microsoft.Network/networkInterfaces", 1937 ) 1938 1939 var ( 1940 instIds []instance.Id 1941 vaultNames []string 1942 otherResources []*armresources.GenericResourceExpanded 1943 ) 1944 for _, r := range resourceItems { 1945 rType := toValue(r.Type) 1946 logger.Debugf("resource to delete: %v (%v)", toValue(r.Name), rType) 1947 // Vault resources are handled by a separate client. 1948 if rType == "Microsoft.KeyVault/vaults" { 1949 vaultNames = append(vaultNames, toValue(r.Name)) 1950 continue 1951 } 1952 if rType == "Microsoft.Compute/virtualMachines" { 1953 instIds = append(instIds, instance.Id(toValue(r.Name))) 1954 continue 1955 } 1956 if !machineResourceTypes.Contains(rType) { 1957 otherResources = append(otherResources, r) 1958 } 1959 } 1960 1961 // Stopping instances will also remove most of their dependent armresources. 1962 err = env.StopInstances(ctx, instIds...) 1963 if err != nil { 1964 return errors.Annotatef(err, "deleting machine instances %q", instIds) 1965 } 1966 1967 // Loop until all remaining resources are deleted. 1968 // For safety, add an upper retry limit; in reality, this will never be hit. 1969 remainingResources := otherResources 1970 retries := 0 1971 for len(remainingResources) > 0 && retries < 10 { 1972 remainingResources, err = env.deleteResources(ctx, remainingResources) 1973 if err != nil { 1974 return errors.Trace(err) 1975 } 1976 retries++ 1977 } 1978 if len(remainingResources) > 0 { 1979 logger.Warningf("could not delete all Azure resources, remaining: %v", remainingResources) 1980 } 1981 1982 // Lastly delete the vault armresources. 1983 for _, vaultName := range vaultNames { 1984 if err := env.deleteVault(ctx, vaultName); err != nil { 1985 return errors.Trace(err) 1986 } 1987 } 1988 return nil 1989 } 1990 1991 func (env *azureEnviron) getModelResources(sdkCtx stdcontext.Context, resourceGroup, modelFilter string) ([]*armresources.GenericResourceExpanded, error) { 1992 resources, err := env.resourcesClient() 1993 if err != nil { 1994 return nil, errors.Trace(err) 1995 } 1996 var resourceItems []*armresources.GenericResourceExpanded 1997 pager := resources.NewListByResourceGroupPager(resourceGroup, &armresources.ClientListByResourceGroupOptions{ 1998 Filter: to.Ptr(modelFilter), 1999 }) 2000 for pager.More() { 2001 next, err := pager.NextPage(sdkCtx) 2002 if err != nil { 2003 return nil, errors.Annotate(err, "listing resources to delete") 2004 } 2005 for _, res := range next.Value { 2006 // If no modelFilter specified, we need to check that the resource 2007 // belongs to this model. 2008 if modelFilter == "" { 2009 fullRes, err := resources.GetByID(sdkCtx, toValue(res.ID), computeAPIVersion, nil) 2010 if err != nil { 2011 return nil, errors.Trace(err) 2012 } 2013 if env.config.UUID() != toValue(fullRes.Tags[tags.JujuModel]) { 2014 continue 2015 } 2016 } 2017 resourceItems = append(resourceItems, res) 2018 } 2019 } 2020 return resourceItems, nil 2021 } 2022 2023 // deleteResources deletes the specified resources, returning any that 2024 // cannot be deleted because they are in use. 2025 func (env *azureEnviron) deleteResources(sdkCtx stdcontext.Context, toDelete []*armresources.GenericResourceExpanded) ([]*armresources.GenericResourceExpanded, error) { 2026 logger.Debugf("deleting %d resources", len(toDelete)) 2027 2028 var remainingResources []*armresources.GenericResourceExpanded 2029 var wg sync.WaitGroup 2030 deleteResults := make([]error, len(toDelete)) 2031 for i, res := range toDelete { 2032 id := toValue(res.ID) 2033 logger.Debugf("- deleting resource %q", id) 2034 wg.Add(1) 2035 go func(i int, id string) { 2036 defer wg.Done() 2037 resources, err := env.resourcesClient() 2038 if err != nil { 2039 deleteResults[i] = err 2040 return 2041 } 2042 poller, err := resources.BeginDeleteByID(sdkCtx, id, computeAPIVersion, nil) 2043 if err == nil { 2044 _, err = poller.PollUntilDone(sdkCtx, nil) 2045 } 2046 if err != nil { 2047 if errorutils.IsNotFoundError(err) { 2048 return 2049 } 2050 // If the resource is in use, don't error, just queue it up for another pass. 2051 if strings.HasPrefix(errorutils.ErrorCode(err), "InUse") { 2052 remainingResources = append(remainingResources, toDelete[i]) 2053 } else { 2054 deleteResults[i] = errors.Annotatef(err, "deleting resource %q: %v", id, err) 2055 } 2056 return 2057 } 2058 }(i, id) 2059 } 2060 wg.Wait() 2061 2062 var errStrings []string 2063 for i, err := range deleteResults { 2064 if err != nil && !errors.IsNotFound(err) { 2065 msg := fmt.Sprintf("error deleting resource %q: %#v", toValue(toDelete[i].ID), err) 2066 errStrings = append(errStrings, msg) 2067 } 2068 } 2069 if len(errStrings) > 0 { 2070 return nil, errors.Annotate(errors.New(strings.Join(errStrings, "\n")), "deleting resources") 2071 } 2072 return remainingResources, nil 2073 } 2074 2075 // Provider is specified in the Environ interface. 2076 func (env *azureEnviron) Provider() environs.EnvironProvider { 2077 return env.provider 2078 } 2079 2080 // resourceGroupName returns the name of the model's resource group to use. 2081 // It may be that a legacy group name is already in use, so use that if present. 2082 func (env *azureEnviron) resourceGroupName(ctx stdcontext.Context, modelTag names.ModelTag, modelName string) (string, error) { 2083 resourceGroups, err := env.resourceGroupsClient() 2084 if err != nil { 2085 return "", errors.Trace(err) 2086 } 2087 // First look for a resource group name with the full model UUID. 2088 legacyName := legacyResourceGroupName(modelTag, modelName) 2089 g, err := resourceGroups.Get(ctx, legacyName, nil) 2090 if err == nil { 2091 logger.Debugf("using existing legacy resource group %q for model %q", legacyName, modelName) 2092 return legacyName, nil 2093 } 2094 if !errorutils.IsNotFoundError(err) { 2095 return "", errors.Trace(err) 2096 } 2097 2098 logger.Debugf("legacy resource group name doesn't exist, using short name") 2099 resourceGroup := resourceGroupName(modelTag, modelName) 2100 g, err = resourceGroups.Get(ctx, resourceGroup, nil) 2101 if err == nil { 2102 mTag, ok := g.Tags[tags.JujuModel] 2103 if !ok || toValue(mTag) != modelTag.Id() { 2104 // This should never happen in practice - combination of model name and first 8 2105 // digits of UUID should be unique. 2106 return "", errors.Errorf("unexpected model UUID on resource group %q; expected %q, got %q", resourceGroup, modelTag.Id(), toValue(mTag)) 2107 } 2108 return resourceGroup, nil 2109 } 2110 if errorutils.IsNotFoundError(err) { 2111 return resourceGroup, nil 2112 } 2113 return "", errors.Trace(err) 2114 } 2115 2116 // resourceGroupName returns the name of the environment's resource group. 2117 func legacyResourceGroupName(modelTag names.ModelTag, modelName string) string { 2118 return fmt.Sprintf("juju-%s-%s", modelName, resourceName(modelTag)) 2119 } 2120 2121 // resourceGroupName returns the name of the environment's resource group. 2122 func resourceGroupName(modelTag names.ModelTag, modelName string) string { 2123 // The first chunk of the UUID string plus model name should be good enough. 2124 return fmt.Sprintf("juju-%s-%s", modelName, modelTag.Id()[:8]) 2125 } 2126 2127 // resourceName returns the string to use for a resource's Name tag, 2128 // to help users identify Juju-managed resources in the Azure portal. 2129 // 2130 // Since resources are grouped under resource groups, we just use the 2131 // tag. 2132 func resourceName(tag names.Tag) string { 2133 return tag.String() 2134 } 2135 2136 // getInstanceTypes gets the instance types available for the configured 2137 // location, keyed by name. 2138 func (env *azureEnviron) getInstanceTypes(ctx context.ProviderCallContext) (map[string]instances.InstanceType, error) { 2139 env.mu.Lock() 2140 defer env.mu.Unlock() 2141 instanceTypes, err := env.getInstanceTypesLocked(ctx) 2142 if err != nil { 2143 return nil, errors.Annotate(err, "getting instance types") 2144 } 2145 return instanceTypes, nil 2146 } 2147 2148 // getInstanceTypesLocked returns the instance types for Azure, by listing the 2149 // role sizes available to the subscription. 2150 func (env *azureEnviron) getInstanceTypesLocked(ctx context.ProviderCallContext) (map[string]instances.InstanceType, error) { 2151 if env.instanceTypes != nil { 2152 return env.instanceTypes, nil 2153 } 2154 2155 skus, err := env.resourceSKUsClient() 2156 if err != nil { 2157 return nil, errors.Trace(err) 2158 } 2159 instanceTypes := make(map[string]instances.InstanceType) 2160 pager := skus.NewListPager(nil) 2161 for pager.More() { 2162 next, err := pager.NextPage(ctx) 2163 if err != nil { 2164 return nil, errorutils.HandleCredentialError(errors.Annotate(err, "listing VM sizes"), ctx) 2165 } 2166 nextResource: 2167 for _, resource := range next.Value { 2168 if resource.ResourceType == nil || *resource.ResourceType != "virtualMachines" { 2169 continue 2170 } 2171 for _, r := range resource.Restrictions { 2172 if toValue(r.ReasonCode) == armcompute.ResourceSKURestrictionsReasonCodeNotAvailableForSubscription { 2173 continue nextResource 2174 } 2175 } 2176 locationOk := false 2177 if resource.Locations != nil { 2178 for _, loc := range resource.Locations { 2179 if strings.EqualFold(toValue(loc), env.location) { 2180 locationOk = true 2181 break 2182 } 2183 } 2184 } 2185 if !locationOk { 2186 continue 2187 } 2188 var ( 2189 cores *int32 2190 mem *int32 2191 rootDisk *int32 2192 ) 2193 for _, capability := range resource.Capabilities { 2194 if capability.Name == nil || capability.Value == nil { 2195 continue 2196 } 2197 switch toValue(capability.Name) { 2198 case "MemoryGB": 2199 memValue, _ := strconv.ParseFloat(*capability.Value, 32) 2200 mem = to.Ptr(int32(1024 * memValue)) 2201 case "vCPUsAvailable", "vCPUs": 2202 coresValue, _ := strconv.Atoi(*capability.Value) 2203 cores = to.Ptr(int32(coresValue)) 2204 case "OSVhdSizeMB": 2205 rootDiskValue, _ := strconv.Atoi(*capability.Value) 2206 rootDisk = to.Ptr(int32(rootDiskValue)) 2207 } 2208 } 2209 instanceType := newInstanceType(armcompute.VirtualMachineSize{ 2210 Name: resource.Name, 2211 NumberOfCores: cores, 2212 OSDiskSizeInMB: rootDisk, 2213 MemoryInMB: mem, 2214 }) 2215 instanceTypes[instanceType.Name] = instanceType 2216 // Create aliases for standard role sizes. 2217 if strings.HasPrefix(instanceType.Name, "Standard_") { 2218 instanceTypes[instanceType.Name[len("Standard_"):]] = instanceType 2219 } 2220 } 2221 } 2222 env.instanceTypes = instanceTypes 2223 return instanceTypes, nil 2224 } 2225 2226 // Region is specified in the HasRegion interface. 2227 func (env *azureEnviron) Region() (simplestreams.CloudSpec, error) { 2228 return simplestreams.CloudSpec{ 2229 Region: env.cloud.Region, 2230 Endpoint: env.cloud.Endpoint, 2231 }, nil 2232 }