github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/provider/azure/environ.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package azure
     5  
     6  import (
     7  	stdcontext "context"
     8  	"fmt"
     9  	"net/url"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/Azure/azure-sdk-for-go/sdk/azcore"
    17  	"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
    18  	"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
    19  	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
    20  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v2"
    21  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork"
    22  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
    23  	"github.com/juju/collections/set"
    24  	"github.com/juju/errors"
    25  	"github.com/juju/names/v5"
    26  	"github.com/juju/retry"
    27  	"github.com/juju/version/v2"
    28  
    29  	"github.com/juju/juju/agent"
    30  	"github.com/juju/juju/cloudconfig/instancecfg"
    31  	"github.com/juju/juju/cloudconfig/providerinit"
    32  	"github.com/juju/juju/core/arch"
    33  	"github.com/juju/juju/core/constraints"
    34  	"github.com/juju/juju/core/instance"
    35  	"github.com/juju/juju/core/os/ostype"
    36  	"github.com/juju/juju/environs"
    37  	environscloudspec "github.com/juju/juju/environs/cloudspec"
    38  	"github.com/juju/juju/environs/config"
    39  	"github.com/juju/juju/environs/context"
    40  	"github.com/juju/juju/environs/instances"
    41  	"github.com/juju/juju/environs/simplestreams"
    42  	"github.com/juju/juju/environs/tags"
    43  	"github.com/juju/juju/provider/azure/internal/armtemplates"
    44  	"github.com/juju/juju/provider/azure/internal/azureauth"
    45  	"github.com/juju/juju/provider/azure/internal/errorutils"
    46  	"github.com/juju/juju/provider/azure/internal/tracing"
    47  	"github.com/juju/juju/provider/common"
    48  	"github.com/juju/juju/tools"
    49  	jujuversion "github.com/juju/juju/version"
    50  )
    51  
    52  const (
    53  	jujuMachineNameTag = tags.JujuTagPrefix + "machine-name"
    54  
    55  	// minRootDiskSize is the minimum root disk size Azure
    56  	// accepts for a VM's OS disk.
    57  	// It will be used if none is specified by the user.
    58  	minRootDiskSize = 30 * 1024 // 30 GiB
    59  
    60  	// serviceErrorCodeDeploymentCannotBeCancelled is the error code for
    61  	// service errors in response to an attempt to cancel a deployment
    62  	// that cannot be cancelled.
    63  	serviceErrorCodeDeploymentCannotBeCancelled = "DeploymentCannotBeCancelled"
    64  
    65  	// serviceErrorCodeResourceGroupBeingDeleted is the error code for
    66  	// service errors in response to an attempt to cancel a deployment
    67  	// that has already started to be deleted.
    68  	serviceErrorCodeResourceGroupBeingDeleted = "ResourceGroupBeingDeleted"
    69  
    70  	// controllerAvailabilitySet is the name of the availability set
    71  	// used for controller machines.
    72  	controllerAvailabilitySet = "juju-controller"
    73  
    74  	// commonDeployment is used to create resources common to all models.
    75  	commonDeployment = "common"
    76  
    77  	computeAPIVersion = "2021-11-01"
    78  	networkAPIVersion = "2018-08-01"
    79  )
    80  
    81  type azureEnviron struct {
    82  	environs.NoSpaceDiscoveryEnviron
    83  
    84  	// provider is the azureEnvironProvider used to open this environment.
    85  	provider *azureEnvironProvider
    86  
    87  	// cloud defines the cloud configuration for this environment.
    88  	cloud environscloudspec.CloudSpec
    89  
    90  	// location is the canonical location name. Use this instead
    91  	// of cloud.Region in API calls.
    92  	location string
    93  
    94  	// subscriptionId is the Azure account subscription ID.
    95  	subscriptionId string
    96  
    97  	// tenantId is the Azure account tenant ID.
    98  	tenantId string
    99  
   100  	// storageEndpoint is the Azure storage endpoint. This is the host
   101  	// portion of the storage endpoint URL only; use this instead of
   102  	// cloud.StorageEndpoint in API calls.
   103  	storageEndpoint string
   104  
   105  	// resourceGroup is the name of the Resource Group in the Azure
   106  	// subscription that corresponds to the environment.
   107  	resourceGroup string
   108  
   109  	// modelName is the name of the model.
   110  	modelName string
   111  
   112  	// namespace is used to create the machine and device hostnames.
   113  	namespace instance.Namespace
   114  
   115  	clientOptions policy.ClientOptions
   116  	credential    azcore.TokenCredential
   117  
   118  	mu                     sync.Mutex
   119  	config                 *azureModelConfig
   120  	instanceTypes          map[string]instances.InstanceType
   121  	commonResourcesCreated bool
   122  }
   123  
   124  var _ environs.Environ = (*azureEnviron)(nil)
   125  
   126  // SetCloudSpec is specified in the environs.Environ interface.
   127  func (env *azureEnviron) SetCloudSpec(ctx stdcontext.Context, cloud environscloudspec.CloudSpec) error {
   128  	if err := validateCloudSpec(cloud); err != nil {
   129  		return errors.Annotate(err, "validating cloud spec")
   130  	}
   131  
   132  	env.mu.Lock()
   133  	defer env.mu.Unlock()
   134  
   135  	// The Azure storage code wants the endpoint host only, not the URL.
   136  	storageEndpointURL, err := url.Parse(cloud.StorageEndpoint)
   137  	if err != nil {
   138  		return errors.Annotate(err, "parsing storage endpoint URL")
   139  	}
   140  	env.cloud = cloud
   141  	env.location = canonicalLocation(cloud.Region)
   142  	env.storageEndpoint = storageEndpointURL.Host
   143  
   144  	if err := env.initEnviron(ctx); err != nil {
   145  		return errors.Trace(err)
   146  	}
   147  
   148  	cfg := env.config
   149  	if env.resourceGroup == "" {
   150  		env.resourceGroup = cfg.resourceGroupName
   151  	}
   152  	// If no user specified resource group, make one from the model UUID.
   153  	if env.resourceGroup == "" {
   154  		modelTag := names.NewModelTag(cfg.UUID())
   155  		if env.resourceGroup, err = env.resourceGroupName(ctx, modelTag, cfg.Name()); err != nil {
   156  			return errors.Trace(err)
   157  		}
   158  	}
   159  	env.modelName = cfg.Name()
   160  	return nil
   161  }
   162  
   163  func (env *azureEnviron) initEnviron(ctx stdcontext.Context) error {
   164  	credAttrs := env.cloud.Credential.Attributes()
   165  	env.subscriptionId = credAttrs[credAttrManagedSubscriptionId]
   166  	if env.subscriptionId == "" {
   167  		env.subscriptionId = credAttrs[credAttrSubscriptionId]
   168  	}
   169  
   170  	env.clientOptions = azcore.ClientOptions{
   171  		Cloud: azureCloud(env.cloud.Name, env.cloud.Endpoint, env.cloud.IdentityEndpoint),
   172  		PerCallPolicies: []policy.Policy{
   173  			&tracing.LoggingPolicy{
   174  				Logger: logger.Child("azureapi"),
   175  			},
   176  		},
   177  		Telemetry: policy.TelemetryOptions{
   178  			ApplicationID: "Juju/" + jujuversion.Current.String(),
   179  		},
   180  		Transport: env.provider.config.Sender,
   181  		Retry:     env.provider.config.Retry,
   182  	}
   183  	if env.provider.config.RequestInspector != nil {
   184  		env.clientOptions.PerCallPolicies = append(env.clientOptions.PerCallPolicies, env.provider.config.RequestInspector)
   185  	}
   186  
   187  	tenantID, err := azureauth.DiscoverTenantID(ctx, env.subscriptionId, arm.ClientOptions{
   188  		ClientOptions: env.clientOptions,
   189  	})
   190  	if err != nil {
   191  		return errors.Annotate(err, "getting tenant ID")
   192  	}
   193  	logger.Debugf("discovered tenant id: %s", tenantID)
   194  	env.tenantId = tenantID
   195  
   196  	appId := credAttrs[credAttrAppId]
   197  	appPassword := credAttrs[credAttrAppPassword]
   198  	env.credential, err = env.provider.config.CreateTokenCredential(appId, appPassword, tenantID, env.clientOptions)
   199  	if err != nil {
   200  		return errors.Annotate(err, "set up credential")
   201  	}
   202  	return nil
   203  }
   204  
   205  // PrepareForBootstrap is part of the Environ interface.
   206  func (env *azureEnviron) PrepareForBootstrap(ctx environs.BootstrapContext, _ string) error {
   207  	if ctx.ShouldVerifyCredentials() {
   208  		cloudCtx := &context.CloudCallContext{
   209  			Context:                  ctx.Context(),
   210  			InvalidateCredentialFunc: func(string) error { return nil },
   211  		}
   212  		if err := verifyCredentials(env, cloudCtx); err != nil {
   213  			return errors.Trace(err)
   214  		}
   215  	}
   216  	return nil
   217  }
   218  
   219  // Create is part of the Environ interface.
   220  func (env *azureEnviron) Create(ctx context.ProviderCallContext, args environs.CreateParams) error {
   221  	if err := verifyCredentials(env, ctx); err != nil {
   222  		return errors.Trace(err)
   223  	}
   224  	return errors.Trace(env.initResourceGroup(ctx, args.ControllerUUID, env.config.resourceGroupName != "", false))
   225  }
   226  
   227  // Bootstrap is part of the Environ interface.
   228  func (env *azureEnviron) Bootstrap(
   229  	ctx environs.BootstrapContext,
   230  	callCtx context.ProviderCallContext,
   231  	args environs.BootstrapParams,
   232  ) (*environs.BootstrapResult, error) {
   233  	if err := env.initResourceGroup(callCtx, args.ControllerConfig.ControllerUUID(), env.config.resourceGroupName != "", true); err != nil {
   234  		return nil, errors.Annotate(err, "creating controller resource group")
   235  	}
   236  	result, err := common.Bootstrap(ctx, env, callCtx, args)
   237  	if err != nil {
   238  		logger.Errorf("bootstrap failed, destroying model: %v", err)
   239  
   240  		// First cancel the in-progress deployment.
   241  		var wg sync.WaitGroup
   242  		var cancelResult error
   243  		logger.Debugf("canceling deployment for bootstrap instance")
   244  		wg.Add(1)
   245  		go func(id string) {
   246  			defer wg.Done()
   247  			cancelResult = errors.Annotatef(
   248  				env.cancelDeployment(callCtx, id),
   249  				"canceling deployment %q", id,
   250  			)
   251  		}(names.NewMachineTag(agent.BootstrapControllerId).String())
   252  		wg.Wait()
   253  		if cancelResult != nil && !errors.IsNotFound(cancelResult) {
   254  			return nil, errors.Annotate(cancelResult, "aborting failed bootstrap")
   255  		}
   256  
   257  		// Then cleanup the resource group.
   258  		if err := env.Destroy(callCtx); err != nil {
   259  			logger.Errorf("failed to destroy model: %v", err)
   260  		}
   261  		return nil, errors.Trace(err)
   262  	}
   263  	return result, nil
   264  }
   265  
   266  // initResourceGroup creates a resource group for this environment.
   267  func (env *azureEnviron) initResourceGroup(ctx context.ProviderCallContext, controllerUUID string, existingResourceGroup, controller bool) error {
   268  	env.mu.Lock()
   269  	resourceTags := tags.ResourceTags(
   270  		names.NewModelTag(env.config.Config.UUID()),
   271  		names.NewControllerTag(controllerUUID),
   272  		env.config,
   273  	)
   274  	env.mu.Unlock()
   275  
   276  	resourceGroups, err := env.resourceGroupsClient()
   277  	if err != nil {
   278  		return errors.Trace(err)
   279  	}
   280  	if existingResourceGroup {
   281  		logger.Debugf("using existing resource group %q for model %q", env.resourceGroup, env.modelName)
   282  		g, err := resourceGroups.Get(ctx, env.resourceGroup, nil)
   283  		if err != nil {
   284  			return errorutils.HandleCredentialError(errors.Annotatef(err, "checking resource group %q", env.resourceGroup), ctx)
   285  		}
   286  		if region := toValue(g.Location); region != env.location {
   287  			return errors.Errorf("cannot use resource group in region %q when operating in region %q", region, env.location)
   288  		}
   289  	} else {
   290  		logger.Debugf("creating resource group %q for model %q", env.resourceGroup, env.modelName)
   291  		if _, err := resourceGroups.CreateOrUpdate(ctx, env.resourceGroup, armresources.ResourceGroup{
   292  			Location: to.Ptr(env.location),
   293  			Tags:     toMapPtr(resourceTags),
   294  		}, nil); err != nil {
   295  			return errorutils.HandleCredentialError(errors.Annotate(err, "creating resource group"), ctx)
   296  		}
   297  	}
   298  
   299  	if !controller {
   300  		// When we create a resource group for a non-controller model,
   301  		// we must create the common resources up-front. This is so
   302  		// that parallel deployments do not affect dynamic changes,
   303  		// e.g. those made by the firewaller. For the controller model,
   304  		// we fold the creation of these resources into the bootstrap
   305  		// machine's deployment.
   306  		if err := env.createCommonResourceDeployment(ctx, resourceTags, nil); err != nil {
   307  			return errors.Trace(err)
   308  		}
   309  	}
   310  
   311  	return nil
   312  }
   313  
   314  func (env *azureEnviron) createCommonResourceDeployment(
   315  	ctx context.ProviderCallContext,
   316  	tags map[string]string,
   317  	rules []*armnetwork.SecurityRule,
   318  	commonResources ...armtemplates.Resource,
   319  ) error {
   320  	// Only create network resources if the user has not
   321  	// specified their own to use.
   322  	if env.config.virtualNetworkName == "" {
   323  		networkResources, _ := networkTemplateResources(env.location, tags, nil, rules)
   324  		commonResources = append(commonResources, networkResources...)
   325  	}
   326  	if len(commonResources) == 0 {
   327  		return nil
   328  	}
   329  
   330  	template := armtemplates.Template{Resources: commonResources}
   331  	if err := env.createDeployment(
   332  		ctx,
   333  		env.resourceGroup,
   334  		commonDeployment,
   335  		template,
   336  	); err != nil {
   337  		return errors.Trace(err)
   338  	}
   339  	return nil
   340  }
   341  
   342  // ControllerInstances is specified in the Environ interface.
   343  func (env *azureEnviron) ControllerInstances(ctx context.ProviderCallContext, controllerUUID string) ([]instance.Id, error) {
   344  	inst, err := env.allInstances(ctx, env.resourceGroup, false, controllerUUID)
   345  	if err != nil {
   346  		return nil, err
   347  	}
   348  	if len(inst) == 0 {
   349  		return nil, environs.ErrNoInstances
   350  	}
   351  	ids := make([]instance.Id, len(inst))
   352  	for i, inst := range inst {
   353  		ids[i] = inst.Id()
   354  	}
   355  	return ids, nil
   356  }
   357  
   358  // Config is specified in the Environ interface.
   359  func (env *azureEnviron) Config() *config.Config {
   360  	env.mu.Lock()
   361  	defer env.mu.Unlock()
   362  	return env.config.Config
   363  }
   364  
   365  // SetConfig is specified in the Environ interface.
   366  func (env *azureEnviron) SetConfig(cfg *config.Config) error {
   367  	env.mu.Lock()
   368  	defer env.mu.Unlock()
   369  
   370  	var old *config.Config
   371  	if env.config != nil {
   372  		old = env.config.Config
   373  	}
   374  	ecfg, err := validateConfig(cfg, old)
   375  	if err != nil {
   376  		return err
   377  	}
   378  	env.config = ecfg
   379  
   380  	return nil
   381  }
   382  
   383  var unsupportedConstraints = []string{
   384  	constraints.CpuPower,
   385  	constraints.Tags,
   386  	constraints.VirtType,
   387  	constraints.ImageID,
   388  }
   389  
   390  // ConstraintsValidator is defined on the Environs interface.
   391  func (env *azureEnviron) ConstraintsValidator(ctx context.ProviderCallContext) (constraints.Validator, error) {
   392  	instanceTypes, err := env.getInstanceTypes(ctx)
   393  	if err != nil {
   394  		return nil, err
   395  	}
   396  	instTypeNames := make([]string, 0, len(instanceTypes))
   397  	for instTypeName := range instanceTypes {
   398  		instTypeNames = append(instTypeNames, instTypeName)
   399  	}
   400  	sort.Strings(instTypeNames)
   401  
   402  	validator := constraints.NewValidator()
   403  	validator.RegisterUnsupported(unsupportedConstraints)
   404  	validator.RegisterVocabulary(
   405  		constraints.Arch,
   406  		[]string{arch.AMD64},
   407  	)
   408  	validator.RegisterVocabulary(
   409  		constraints.InstanceType,
   410  		instTypeNames,
   411  	)
   412  	validator.RegisterConflicts(
   413  		[]string{constraints.InstanceType},
   414  		[]string{
   415  			constraints.Mem,
   416  			constraints.Cores,
   417  			// TODO: move to a dynamic conflict for arch when azure supports more than amd64
   418  			//constraints.Arch,
   419  		},
   420  	)
   421  	return validator, nil
   422  }
   423  
   424  // PrecheckInstance is defined on the environs.InstancePrechecker interface.
   425  func (env *azureEnviron) PrecheckInstance(ctx context.ProviderCallContext, args environs.PrecheckInstanceParams) error {
   426  	if _, err := env.findPlacementSubnet(ctx, args.Placement); err != nil {
   427  		return errors.Trace(err)
   428  	}
   429  	if !args.Constraints.HasInstanceType() {
   430  		return nil
   431  	}
   432  	// Constraint has an instance-type constraint so let's see if it is valid.
   433  	instanceTypes, err := env.getInstanceTypes(ctx)
   434  	if err != nil {
   435  		return err
   436  	}
   437  	for _, instanceType := range instanceTypes {
   438  		if instanceType.Name == *args.Constraints.InstanceType {
   439  			return nil
   440  		}
   441  	}
   442  	return fmt.Errorf("invalid instance type %q", *args.Constraints.InstanceType)
   443  }
   444  
   445  // StartInstance is specified in the InstanceBroker interface.
   446  func (env *azureEnviron) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) {
   447  	if args.ControllerUUID == "" {
   448  		return nil, errors.New("missing controller UUID")
   449  	}
   450  
   451  	// Get the required configuration and config-dependent information
   452  	// required to create the instance. We take the lock just once, to
   453  	// ensure we obtain all information based on the same configuration.
   454  	env.mu.Lock()
   455  	envTags := tags.ResourceTags(
   456  		names.NewModelTag(env.config.Config.UUID()),
   457  		names.NewControllerTag(args.ControllerUUID),
   458  		env.config,
   459  	)
   460  	imageStream := env.config.ImageStream()
   461  	envInstanceTypes, err := env.getInstanceTypesLocked(ctx)
   462  	if err != nil {
   463  		env.mu.Unlock()
   464  		return nil, errors.Trace(err)
   465  	}
   466  	instanceTypes := make(map[string]instances.InstanceType)
   467  	for k, v := range envInstanceTypes {
   468  		instanceTypes[k] = v
   469  	}
   470  	env.mu.Unlock()
   471  
   472  	// If the user has not specified a root-disk size, then
   473  	// set a sensible default.
   474  	var rootDisk uint64
   475  	// Azure complains if we try and specify a root disk size less than the minimum.
   476  	// See http://pad.lv/1645408
   477  	if args.Constraints.RootDisk != nil && *args.Constraints.RootDisk > minRootDiskSize {
   478  		rootDisk = *args.Constraints.RootDisk
   479  	} else {
   480  		rootDisk = minRootDiskSize
   481  		args.Constraints.RootDisk = &rootDisk
   482  	}
   483  	// Start the instance - if we get a quota error, that instance type is ignored
   484  	// and we'll try the next most expensive one, up to a reasonable number of attempts.
   485  	arch, err := args.Tools.OneArch()
   486  	if err != nil {
   487  		return nil, errors.Trace(err)
   488  	}
   489  	for i := 0; i < 15; i++ {
   490  		// Identify the instance type and image to provision.
   491  		instanceSpec, err := env.findInstanceSpec(
   492  			ctx,
   493  			instanceTypes,
   494  			&instances.InstanceConstraint{
   495  				Region:      env.location,
   496  				Base:        args.InstanceConfig.Base,
   497  				Arch:        arch,
   498  				Constraints: args.Constraints,
   499  			},
   500  			imageStream,
   501  		)
   502  		if err != nil {
   503  			return nil, err
   504  		}
   505  		if rootDisk < instanceSpec.InstanceType.RootDisk {
   506  			// The InstanceType's RootDisk is set to the maximum
   507  			// OS disk size; override it with the user-specified
   508  			// or default root disk size.
   509  			instanceSpec.InstanceType.RootDisk = rootDisk
   510  		}
   511  		result, err := env.startInstance(ctx, args, instanceSpec, envTags)
   512  		quotaErr, ok := errorutils.MaybeQuotaExceededError(err)
   513  		if ok {
   514  			logger.Warningf("%v quota exceeded error: %q", instanceSpec.InstanceType.Name, quotaErr.Error())
   515  			deleteInstanceFamily(instanceTypes, instanceSpec.InstanceType.Name)
   516  			continue
   517  		}
   518  		return result, errorutils.SimpleError(err)
   519  	}
   520  	return nil, errors.New("no suitable instance type found for this subscription")
   521  }
   522  func (env *azureEnviron) startInstance(
   523  	ctx context.ProviderCallContext, args environs.StartInstanceParams,
   524  	instanceSpec *instances.InstanceSpec, envTags map[string]string,
   525  ) (*environs.StartInstanceResult, error) {
   526  
   527  	// Pick tools by filtering the available tools down to the architecture of
   528  	// the image that will be provisioned.
   529  	selectedTools, err := args.Tools.Match(tools.Filter{
   530  		Arch: instanceSpec.Image.Arch,
   531  	})
   532  	if err != nil {
   533  		return nil, errors.Trace(err)
   534  	}
   535  	logger.Infof("picked agent binaries %q", selectedTools[0].Version)
   536  
   537  	// Finalize the instance config, which we'll render to CustomData below.
   538  	if err := args.InstanceConfig.SetTools(selectedTools); err != nil {
   539  		return nil, errors.Trace(err)
   540  	}
   541  	if err := instancecfg.FinishInstanceConfig(
   542  		args.InstanceConfig, env.Config(),
   543  	); err != nil {
   544  		return nil, err
   545  	}
   546  
   547  	vmName, err := env.namespace.Hostname(args.InstanceConfig.MachineId)
   548  	if err != nil {
   549  		return nil, errors.Trace(err)
   550  	}
   551  
   552  	vmTags := make(map[string]string)
   553  	for k, v := range args.InstanceConfig.Tags {
   554  		vmTags[k] = v
   555  	}
   556  	// jujuMachineNameTag identifies the VM name, in which is encoded
   557  	// the Juju machine name. We tag all resources related to the
   558  	// machine with this.
   559  	vmTags[jujuMachineNameTag] = vmName
   560  
   561  	// Use a public IP by default unless a constraint
   562  	// explicitly forbids it.
   563  	usePublicIP := true
   564  	if args.Constraints.HasAllocatePublicIP() {
   565  		usePublicIP = *args.Constraints.AllocatePublicIP
   566  	}
   567  	err = env.createVirtualMachine(
   568  		ctx, vmName, vmTags, envTags,
   569  		instanceSpec, args, usePublicIP, true,
   570  	)
   571  	// If there's a conflict, it's because another machine is
   572  	// being provisioned with the same availability set so
   573  	// retry and do not create the availability set.
   574  	if errorutils.IsConflictError(err) {
   575  		logger.Debugf("conflict creating %s, retrying...", vmName)
   576  		err = env.createVirtualMachine(
   577  			ctx, vmName, vmTags, envTags,
   578  			instanceSpec, args, usePublicIP, false,
   579  		)
   580  	}
   581  	if err != nil {
   582  		logger.Debugf("creating instance failed, destroying: %v", err)
   583  		if err := env.StopInstances(ctx, instance.Id(vmName)); err != nil {
   584  			logger.Errorf("could not destroy failed virtual machine: %v", err)
   585  		}
   586  		return nil, errors.Annotatef(err, "creating virtual machine %q", vmName)
   587  	}
   588  
   589  	// Note: the instance is initialised without addresses to keep the
   590  	// API chatter down. We will refresh the instance if we need to know
   591  	// the addresses.
   592  	inst := &azureInstance{
   593  		vmName:            vmName,
   594  		provisioningState: armresources.ProvisioningStateCreating,
   595  		env:               env,
   596  	}
   597  	amd64 := arch.AMD64
   598  	hc := &instance.HardwareCharacteristics{
   599  		Arch:     &amd64,
   600  		Mem:      &instanceSpec.InstanceType.Mem,
   601  		RootDisk: &instanceSpec.InstanceType.RootDisk,
   602  		CpuCores: &instanceSpec.InstanceType.CpuCores,
   603  	}
   604  	return &environs.StartInstanceResult{
   605  		Instance: inst,
   606  		Hardware: hc,
   607  	}, nil
   608  }
   609  
   610  // referenceInfo splits a reference to an Azure entity into an
   611  // optional resource group and name, or just name if no
   612  // resource group is specified.
   613  func referenceInfo(entityRef string) (entityRG, entityName string) {
   614  	parts := strings.Split(entityRef, "/")
   615  	if len(parts) == 1 {
   616  		return "", entityRef
   617  	}
   618  	return parts[0], parts[1]
   619  }
   620  
   621  // createVirtualMachine creates a virtual machine and related resources.
   622  //
   623  // All resources created are tagged with the specified "vmTags", so if
   624  // this function fails then all resources can be deleted by tag.
   625  func (env *azureEnviron) createVirtualMachine(
   626  	ctx context.ProviderCallContext,
   627  	vmName string,
   628  	vmTags, envTags map[string]string,
   629  	instanceSpec *instances.InstanceSpec,
   630  	args environs.StartInstanceParams,
   631  	usePublicIP bool,
   632  	createAvailabilitySet bool,
   633  ) error {
   634  	instanceConfig := args.InstanceConfig
   635  	apiPorts := make([]int, 0, 2)
   636  	if instanceConfig.IsController() {
   637  		apiPorts = append(apiPorts, instanceConfig.ControllerConfig.APIPort())
   638  		if instanceConfig.ControllerConfig.AutocertDNSName() != "" {
   639  			// Open port 80 as well as it handles Let's Encrypt HTTP challenge.
   640  			apiPorts = append(apiPorts, 80)
   641  		}
   642  	} else {
   643  		ports := instanceConfig.APIInfo.Ports()
   644  		if len(ports) != 1 {
   645  			return errors.Errorf("expected one API port, found %v", ports)
   646  		}
   647  		apiPorts = append(apiPorts, ports[0])
   648  	}
   649  
   650  	var nicDependsOn, vmDependsOn []string
   651  	var res []armtemplates.Resource
   652  	bootstrapping := instanceConfig.Bootstrap != nil
   653  	// We only need to deal with creating network resources
   654  	// if the user has not specified their own to use.
   655  	if bootstrapping && env.config.virtualNetworkName == "" && args.Placement == "" {
   656  		// We're starting the bootstrap machine, so we will create the
   657  		// networking resources in the same deployment.
   658  		networkResources, dependsOn := networkTemplateResources(env.location, envTags, apiPorts, nil)
   659  		res = append(res, networkResources...)
   660  		nicDependsOn = append(nicDependsOn, dependsOn...)
   661  	}
   662  	if !bootstrapping {
   663  		// Wait for the common resource deployment to complete.
   664  		if err := env.waitCommonResourcesCreated(ctx); err != nil {
   665  			return errors.Annotate(
   666  				err, "waiting for common resources to be created",
   667  			)
   668  		}
   669  	}
   670  
   671  	osProfile, seriesOS, err := newOSProfile(
   672  		vmName, instanceConfig,
   673  		env.provider.config.GenerateSSHKey,
   674  	)
   675  	if err != nil {
   676  		return errors.Annotate(err, "creating OS profile")
   677  	}
   678  	storageProfile, err := newStorageProfile(
   679  		vmName,
   680  		instanceSpec,
   681  	)
   682  	if err != nil {
   683  		return errors.Annotate(err, "creating storage profile")
   684  	}
   685  	diskEncryptionID, err := env.diskEncryptionInfo(ctx, args.RootDisk, envTags)
   686  	if err != nil {
   687  		return environs.ZoneIndependentError(fmt.Errorf("creating disk encryption info: %w", err))
   688  	}
   689  	if diskEncryptionID != "" && storageProfile.OSDisk.ManagedDisk != nil {
   690  		storageProfile.OSDisk.ManagedDisk.DiskEncryptionSet = &armcompute.DiskEncryptionSetParameters{
   691  			ID: to.Ptr(diskEncryptionID),
   692  		}
   693  	}
   694  
   695  	var availabilitySetSubResource *armcompute.SubResource
   696  	availabilitySetName, err := availabilitySetName(
   697  		vmName, vmTags, instanceConfig.IsController(),
   698  	)
   699  	if err != nil {
   700  		return errors.Annotate(err, "getting availability set name")
   701  	}
   702  	availabilitySetId := fmt.Sprintf(
   703  		`[resourceId('Microsoft.Compute/availabilitySets','%s')]`,
   704  		availabilitySetName,
   705  	)
   706  	if availabilitySetName != "" {
   707  		availabilitySetSubResource = &armcompute.SubResource{
   708  			ID: to.Ptr(availabilitySetId),
   709  		}
   710  	}
   711  	if !createAvailabilitySet && availabilitySetName != "" {
   712  		availabilitySet, err := env.availabilitySetsClient()
   713  		if err != nil {
   714  			return errors.Trace(err)
   715  		}
   716  		if _, err = availabilitySet.Get(ctx, env.resourceGroup, availabilitySetName, nil); err != nil {
   717  			return errors.Annotatef(err, "expecting availability set %q to be available", availabilitySetName)
   718  		}
   719  	}
   720  	if createAvailabilitySet && availabilitySetName != "" {
   721  		availabilitySetProperties := &armcompute.AvailabilitySetProperties{
   722  			// Azure complains when the fault domain count
   723  			// is not specified, even though it is meant
   724  			// to be optional and default to the maximum.
   725  			// The maximum depends on the location, and
   726  			// there is no API to query it.
   727  			PlatformFaultDomainCount: to.Ptr(maxFaultDomains(env.location)),
   728  		}
   729  		res = append(res, armtemplates.Resource{
   730  			APIVersion: computeAPIVersion,
   731  			Type:       "Microsoft.Compute/availabilitySets",
   732  			Name:       availabilitySetName,
   733  			Location:   env.location,
   734  			Tags:       envTags,
   735  			Properties: availabilitySetProperties,
   736  			Sku:        &armtemplates.Sku{Name: "Aligned"},
   737  		})
   738  		vmDependsOn = append(vmDependsOn, availabilitySetId)
   739  	}
   740  
   741  	placementSubnetID, err := env.findPlacementSubnet(ctx, args.Placement)
   742  	if err != nil {
   743  		return environs.ZoneIndependentError(err)
   744  	}
   745  	vnetId, subnetIds, err := env.networkInfoForInstance(ctx, args, bootstrapping, instanceConfig.IsController(), placementSubnetID)
   746  	if err != nil {
   747  		return environs.ZoneIndependentError(err)
   748  	}
   749  	logger.Debugf("creating instance using vnet %v, subnets %q", vnetId, subnetIds)
   750  
   751  	if env.config.virtualNetworkName == "" && bootstrapping {
   752  		nicDependsOn = append(nicDependsOn, vnetId)
   753  	}
   754  
   755  	var publicIPAddressId string
   756  	if usePublicIP {
   757  		publicIPAddressName := vmName + "-public-ip"
   758  		publicIPAddressId = fmt.Sprintf(`[resourceId('Microsoft.Network/publicIPAddresses', '%s')]`, publicIPAddressName)
   759  		// Default to static public IP so address is preserved across reboots.
   760  		publicIPAddressAllocationMethod := armnetwork.IPAllocationMethodStatic
   761  		if env.config.loadBalancerSkuName == string(armnetwork.LoadBalancerSKUNameBasic) {
   762  			publicIPAddressAllocationMethod = armnetwork.IPAllocationMethodDynamic // preserve the settings that were used in Juju 2.4 and earlier
   763  		}
   764  		res = append(res, armtemplates.Resource{
   765  			APIVersion: networkAPIVersion,
   766  			Type:       "Microsoft.Network/publicIPAddresses",
   767  			Name:       publicIPAddressName,
   768  			Location:   env.location,
   769  			Tags:       vmTags,
   770  			Sku:        &armtemplates.Sku{Name: env.config.loadBalancerSkuName},
   771  			Properties: &armnetwork.PublicIPAddressPropertiesFormat{
   772  				PublicIPAddressVersion:   to.Ptr(armnetwork.IPVersionIPv4),
   773  				PublicIPAllocationMethod: to.Ptr(publicIPAddressAllocationMethod),
   774  			},
   775  		})
   776  	}
   777  
   778  	// Create one NIC per subnet. The first one is the primary and has
   779  	// the public IP address if so configured.
   780  	var nics []*armcompute.NetworkInterfaceReference
   781  	for i, subnetID := range subnetIds {
   782  		primary := i == 0
   783  		ipConfig := &armnetwork.InterfaceIPConfigurationPropertiesFormat{
   784  			Primary:                   to.Ptr(primary),
   785  			PrivateIPAllocationMethod: to.Ptr(armnetwork.IPAllocationMethodDynamic),
   786  			Subnet:                    &armnetwork.Subnet{ID: to.Ptr(string(subnetID))},
   787  		}
   788  		if primary && usePublicIP {
   789  			ipConfig.PublicIPAddress = &armnetwork.PublicIPAddress{
   790  				ID: to.Ptr(publicIPAddressId),
   791  			}
   792  			nicDependsOn = append(nicDependsOn, publicIPAddressId)
   793  		}
   794  		ipConfigName := "primary"
   795  		if i > 0 {
   796  			ipConfigName = fmt.Sprintf("interface-%d", i)
   797  		}
   798  		nicName := vmName + "-" + ipConfigName
   799  		nicId := fmt.Sprintf(`[resourceId('Microsoft.Network/networkInterfaces', '%s')]`, nicName)
   800  		ipConfigurations := []*armnetwork.InterfaceIPConfiguration{{
   801  			Name:       to.Ptr(ipConfigName),
   802  			Properties: ipConfig,
   803  		}}
   804  		res = append(res, armtemplates.Resource{
   805  			APIVersion: networkAPIVersion,
   806  			Type:       "Microsoft.Network/networkInterfaces",
   807  			Name:       nicName,
   808  			Location:   env.location,
   809  			Tags:       vmTags,
   810  			Properties: &armnetwork.InterfacePropertiesFormat{
   811  				IPConfigurations: ipConfigurations,
   812  			},
   813  			DependsOn: nicDependsOn,
   814  		})
   815  		vmDependsOn = append(vmDependsOn, nicId)
   816  
   817  		nics = append(nics, &armcompute.NetworkInterfaceReference{
   818  			ID: to.Ptr(nicId),
   819  			Properties: &armcompute.NetworkInterfaceReferenceProperties{
   820  				Primary: to.Ptr(primary),
   821  			},
   822  		})
   823  	}
   824  
   825  	res = append(res, armtemplates.Resource{
   826  		APIVersion: computeAPIVersion,
   827  		Type:       "Microsoft.Compute/virtualMachines",
   828  		Name:       vmName,
   829  		Location:   env.location,
   830  		Tags:       vmTags,
   831  		Properties: &armcompute.VirtualMachineProperties{
   832  			HardwareProfile: &armcompute.HardwareProfile{
   833  				VMSize: to.Ptr(armcompute.VirtualMachineSizeTypes(
   834  					instanceSpec.InstanceType.Name,
   835  				)),
   836  			},
   837  			StorageProfile: storageProfile,
   838  			OSProfile:      osProfile,
   839  			NetworkProfile: &armcompute.NetworkProfile{
   840  				NetworkInterfaces: nics,
   841  			},
   842  			AvailabilitySet: availabilitySetSubResource,
   843  		},
   844  		DependsOn: vmDependsOn,
   845  	})
   846  
   847  	// On CentOS, we must add the CustomScript VM extension to run the
   848  	// CustomData script.
   849  	if seriesOS == ostype.CentOS {
   850  		properties, err := vmExtensionProperties(seriesOS)
   851  		if err != nil {
   852  			return errors.Annotate(
   853  				err, "creating virtual machine extension",
   854  			)
   855  		}
   856  		res = append(res, armtemplates.Resource{
   857  			APIVersion: computeAPIVersion,
   858  			Type:       "Microsoft.Compute/virtualMachines/extensions",
   859  			Name:       vmName + "/" + extensionName,
   860  			Location:   env.location,
   861  			Tags:       vmTags,
   862  			Properties: properties,
   863  			DependsOn:  []string{"Microsoft.Compute/virtualMachines/" + vmName},
   864  		})
   865  	}
   866  
   867  	logger.Debugf("- creating virtual machine deployment in %q", env.resourceGroup)
   868  	template := armtemplates.Template{Resources: res}
   869  	if err := env.createDeployment(
   870  		ctx,
   871  		env.resourceGroup,
   872  		vmName, // deployment name
   873  		template,
   874  	); err != nil {
   875  		return errors.Trace(err)
   876  	}
   877  	return nil
   878  }
   879  
   880  // maxFaultDomains returns the maximum number of fault domains for the
   881  // given location/region. The numbers were taken from
   882  // https://docs.microsoft.com/en-au/azure/virtual-machines/windows/manage-availability,
   883  // as at 31 August 2017.
   884  func maxFaultDomains(location string) int32 {
   885  	// From the page linked in the doc comment:
   886  	// "The number of fault domains for managed availability sets varies
   887  	// by region - either two or three per region."
   888  	//
   889  	// We record those that at the time of writing have 3. Anything
   890  	// else has at least 2, so we just assume 2.
   891  	switch location {
   892  	case
   893  		"eastus",
   894  		"eastus2",
   895  		"westus",
   896  		"centralus",
   897  		"northcentralus",
   898  		"southcentralus",
   899  		"northeurope",
   900  		"westeurope":
   901  		return 3
   902  	}
   903  	return 2
   904  }
   905  
   906  // waitCommonResourcesCreated waits for the "common" deployment to complete.
   907  func (env *azureEnviron) waitCommonResourcesCreated(ctx context.ProviderCallContext) error {
   908  	env.mu.Lock()
   909  	defer env.mu.Unlock()
   910  	if env.commonResourcesCreated {
   911  		return nil
   912  	}
   913  	if _, err := env.waitCommonResourcesCreatedLocked(ctx); err != nil {
   914  		return errors.Trace(err)
   915  	}
   916  	env.commonResourcesCreated = true
   917  	return nil
   918  }
   919  
   920  type deploymentIncompleteError struct {
   921  	error
   922  }
   923  
   924  func (env *azureEnviron) waitCommonResourcesCreatedLocked(ctx context.ProviderCallContext) (*armresources.DeploymentExtended, error) {
   925  	// Release the lock while we're waiting, to avoid blocking others.
   926  	env.mu.Unlock()
   927  	defer env.mu.Lock()
   928  
   929  	deploy, err := env.deployClient()
   930  	if err != nil {
   931  		return nil, errors.Trace(err)
   932  	}
   933  	// Wait for up to 5 minutes, with a 5 second polling interval,
   934  	// for the "common" deployment to be in one of the terminal
   935  	// states. The deployment typically takes only around 30 seconds,
   936  	// but we allow for a longer duration to be defensive.
   937  	var deployment *armresources.DeploymentExtended
   938  	waitDeployment := func() error {
   939  		result, err := deploy.Get(ctx, env.resourceGroup, commonDeployment, nil)
   940  		if err != nil {
   941  			if errorutils.IsNotFoundError(err) {
   942  				// The controller model, and also models with bespoke
   943  				// networks, do not have a "common" deployment
   944  				// For controller models, common resources are created
   945  				// in the machine-0 deployment to keep bootstrap times optimal.
   946  				return nil
   947  			}
   948  			return errors.Annotate(err, "querying common deployment")
   949  		}
   950  		if result.Properties == nil {
   951  			return deploymentIncompleteError{errors.New("deployment incomplete")}
   952  		}
   953  
   954  		state := toValue(result.Properties.ProvisioningState)
   955  		if state == armresources.ProvisioningStateSucceeded {
   956  			// The deployment has succeeded, so the resources are
   957  			// ready for use.
   958  			deployment = to.Ptr(result.DeploymentExtended)
   959  			return nil
   960  		}
   961  		err = errors.Errorf("%q resource deployment status is %q", commonDeployment, state)
   962  		switch state {
   963  		case armresources.ProvisioningStateCanceled,
   964  			armresources.ProvisioningStateFailed,
   965  			armresources.ProvisioningStateDeleted:
   966  		default:
   967  			err = deploymentIncompleteError{err}
   968  		}
   969  		return err
   970  	}
   971  	if err := retry.Call(retry.CallArgs{
   972  		Func: waitDeployment,
   973  		IsFatalError: func(err error) bool {
   974  			_, ok := err.(deploymentIncompleteError)
   975  			return !ok
   976  		},
   977  		Attempts:    -1,
   978  		Delay:       5 * time.Second,
   979  		MaxDuration: 5 * time.Minute,
   980  		Clock:       env.provider.config.RetryClock,
   981  	}); err != nil {
   982  		return nil, errors.Trace(err)
   983  	}
   984  	return deployment, nil
   985  }
   986  
   987  // createAvailabilitySet creates the availability set for a machine to use
   988  // if it doesn't already exist, and returns the availability set's ID. The
   989  // algorithm used for choosing the availability set is:
   990  //   - if the machine is a controller, use the availability set name
   991  //     "juju-controller";
   992  //   - if the machine has units assigned, create an availability
   993  //     name with a name based on the value of the tags.JujuUnitsDeployed tag
   994  //     in vmTags, if it exists;
   995  //   - otherwise, do not assign the machine to an availability set
   996  func availabilitySetName(
   997  	vmName string,
   998  	vmTags map[string]string,
   999  	controller bool,
  1000  ) (string, error) {
  1001  	logger.Debugf("selecting availability set for %q", vmName)
  1002  	if controller {
  1003  		return controllerAvailabilitySet, nil
  1004  	}
  1005  
  1006  	// We'll have to create an availability set. Use the name of one of the
  1007  	// services assigned to the machine.
  1008  	var availabilitySetName string
  1009  	if unitNames, ok := vmTags[tags.JujuUnitsDeployed]; ok {
  1010  		for _, unitName := range strings.Fields(unitNames) {
  1011  			if !names.IsValidUnit(unitName) {
  1012  				continue
  1013  			}
  1014  			serviceName, err := names.UnitApplication(unitName)
  1015  			if err != nil {
  1016  				return "", errors.Annotate(err, "getting application name")
  1017  			}
  1018  			availabilitySetName = serviceName
  1019  			break
  1020  		}
  1021  	}
  1022  	return availabilitySetName, nil
  1023  }
  1024  
  1025  // newStorageProfile creates the storage profile for a virtual machine,
  1026  // based on the series and chosen instance spec.
  1027  func newStorageProfile(
  1028  	vmName string,
  1029  	instanceSpec *instances.InstanceSpec,
  1030  ) (*armcompute.StorageProfile, error) {
  1031  	logger.Debugf("creating storage profile for %q", vmName)
  1032  
  1033  	urnParts := strings.SplitN(instanceSpec.Image.Id, ":", 4)
  1034  	if len(urnParts) != 4 {
  1035  		return nil, errors.Errorf("invalid image ID %q", instanceSpec.Image.Id)
  1036  	}
  1037  	publisher := urnParts[0]
  1038  	offer := urnParts[1]
  1039  	sku := urnParts[2]
  1040  	vers := urnParts[3]
  1041  
  1042  	osDiskName := vmName
  1043  	osDiskSizeGB := mibToGB(instanceSpec.InstanceType.RootDisk)
  1044  	osDisk := &armcompute.OSDisk{
  1045  		Name:         to.Ptr(osDiskName),
  1046  		CreateOption: to.Ptr(armcompute.DiskCreateOptionTypesFromImage),
  1047  		Caching:      to.Ptr(armcompute.CachingTypesReadWrite),
  1048  		DiskSizeGB:   to.Ptr(int32(osDiskSizeGB)),
  1049  		ManagedDisk: &armcompute.ManagedDiskParameters{
  1050  			StorageAccountType: to.Ptr(armcompute.StorageAccountTypesStandardLRS),
  1051  		},
  1052  	}
  1053  
  1054  	return &armcompute.StorageProfile{
  1055  		ImageReference: &armcompute.ImageReference{
  1056  			Publisher: to.Ptr(publisher),
  1057  			Offer:     to.Ptr(offer),
  1058  			SKU:       to.Ptr(sku),
  1059  			Version:   to.Ptr(vers),
  1060  		},
  1061  		OSDisk: osDisk,
  1062  	}, nil
  1063  }
  1064  
  1065  func mibToGB(mib uint64) uint64 {
  1066  	b := float64(mib * 1024 * 1024)
  1067  	return uint64(b / (1000 * 1000 * 1000))
  1068  }
  1069  
  1070  func newOSProfile(
  1071  	vmName string,
  1072  	instanceConfig *instancecfg.InstanceConfig,
  1073  	generateSSHKey func(string) (string, string, error),
  1074  ) (*armcompute.OSProfile, ostype.OSType, error) {
  1075  	logger.Debugf("creating OS profile for %q", vmName)
  1076  
  1077  	customData, err := providerinit.ComposeUserData(instanceConfig, nil, AzureRenderer{})
  1078  	if err != nil {
  1079  		return nil, ostype.Unknown, errors.Annotate(err, "composing user data")
  1080  	}
  1081  
  1082  	osProfile := &armcompute.OSProfile{
  1083  		ComputerName: to.Ptr(vmName),
  1084  		CustomData:   to.Ptr(string(customData)),
  1085  	}
  1086  
  1087  	instOS := ostype.OSTypeForName(instanceConfig.Base.OS)
  1088  	if err != nil {
  1089  		return nil, ostype.Unknown, errors.Trace(err)
  1090  	}
  1091  	switch instOS {
  1092  	case ostype.Ubuntu, ostype.CentOS:
  1093  		// SSH keys are handled by custom data, but must also be
  1094  		// specified in order to forego providing a password, and
  1095  		// disable password authentication.
  1096  		authorizedKeys := instanceConfig.AuthorizedKeys
  1097  		if len(authorizedKeys) == 0 {
  1098  			// Azure requires that machines be provisioned with
  1099  			// either a password or at least one SSH key. We
  1100  			// generate a key-pair to make Azure happy, but throw
  1101  			// away the private key so that nobody will be able
  1102  			// to log into the machine directly unless the keys
  1103  			// are updated with one that Juju tracks.
  1104  			_, public, err := generateSSHKey("")
  1105  			if err != nil {
  1106  				return nil, ostype.Unknown, errors.Trace(err)
  1107  			}
  1108  			authorizedKeys = public
  1109  		}
  1110  
  1111  		publicKeys := []*armcompute.SSHPublicKey{{
  1112  			Path:    to.Ptr("/home/ubuntu/.ssh/authorized_keys"),
  1113  			KeyData: to.Ptr(authorizedKeys),
  1114  		}}
  1115  		osProfile.AdminUsername = to.Ptr("ubuntu")
  1116  		osProfile.LinuxConfiguration = &armcompute.LinuxConfiguration{
  1117  			DisablePasswordAuthentication: to.Ptr(true),
  1118  			SSH:                           &armcompute.SSHConfiguration{PublicKeys: publicKeys},
  1119  		}
  1120  	default:
  1121  		return nil, ostype.Unknown, errors.NotSupportedf("%s", instOS)
  1122  	}
  1123  	return osProfile, instOS, nil
  1124  }
  1125  
  1126  // StopInstances is specified in the InstanceBroker interface.
  1127  func (env *azureEnviron) StopInstances(ctx context.ProviderCallContext, ids ...instance.Id) error {
  1128  	if len(ids) == 0 {
  1129  		return nil
  1130  	}
  1131  
  1132  	// First up, cancel the deployments. Then we can identify the resources
  1133  	// that need to be deleted without racing with their creation.
  1134  	var wg sync.WaitGroup
  1135  	var existing int
  1136  	cancelResults := make([]error, len(ids))
  1137  	for i, id := range ids {
  1138  		logger.Debugf("canceling deployment for instance %q", id)
  1139  		wg.Add(1)
  1140  		go func(i int, id instance.Id) {
  1141  			defer wg.Done()
  1142  			cancelResults[i] = errors.Annotatef(
  1143  				env.cancelDeployment(ctx, string(id)),
  1144  				"canceling deployment %q", id,
  1145  			)
  1146  		}(i, id)
  1147  	}
  1148  	wg.Wait()
  1149  	for _, err := range cancelResults {
  1150  		if err == nil {
  1151  			existing++
  1152  		} else if !errors.IsNotFound(err) {
  1153  			return err
  1154  		}
  1155  	}
  1156  	if existing == 0 {
  1157  		// None of the instances exist, so we can stop now.
  1158  		return nil
  1159  	}
  1160  
  1161  	// List network interfaces and public IP addresses.
  1162  	instanceNics, err := env.instanceNetworkInterfaces(
  1163  		ctx,
  1164  		env.resourceGroup,
  1165  	)
  1166  	if err != nil {
  1167  		return errors.Trace(err)
  1168  	}
  1169  	instancePips, err := env.instancePublicIPAddresses(
  1170  		ctx,
  1171  		env.resourceGroup,
  1172  	)
  1173  	if err != nil {
  1174  		return errors.Trace(err)
  1175  	}
  1176  
  1177  	// Delete the deployments, virtual machines, and related armresources.
  1178  	deleteResults := make([]error, len(ids))
  1179  	for i, id := range ids {
  1180  		if errors.IsNotFound(cancelResults[i]) {
  1181  			continue
  1182  		}
  1183  		// The deployment does not exist, so there's nothing more to do.
  1184  		logger.Debugf("deleting instance %q", id)
  1185  		wg.Add(1)
  1186  		go func(i int, id instance.Id) {
  1187  			defer wg.Done()
  1188  			err := env.deleteVirtualMachine(
  1189  				ctx,
  1190  				id,
  1191  				instanceNics[id],
  1192  				instancePips[id],
  1193  			)
  1194  			deleteResults[i] = errors.Annotatef(
  1195  				err, "deleting instance %q", id,
  1196  			)
  1197  		}(i, id)
  1198  	}
  1199  	wg.Wait()
  1200  	for _, err := range deleteResults {
  1201  		if err != nil && !errors.IsNotFound(err) {
  1202  			return errors.Trace(err)
  1203  		}
  1204  	}
  1205  
  1206  	return nil
  1207  }
  1208  
  1209  // cancelDeployment cancels a template deployment.
  1210  func (env *azureEnviron) cancelDeployment(ctx context.ProviderCallContext, name string) error {
  1211  	logger.Debugf("- canceling deployment %q", name)
  1212  	deploy, err := env.deployClient()
  1213  	if err != nil {
  1214  		return errors.Trace(err)
  1215  	}
  1216  	_, err = deploy.Cancel(ctx, env.resourceGroup, name, nil)
  1217  	if err != nil {
  1218  		if errorutils.IsNotFoundError(err) {
  1219  			return errors.NewNotFound(err, fmt.Sprintf("deployment %q not found", name))
  1220  		}
  1221  		// Deployments can only canceled while they're running.
  1222  		if isDeployConflictError(err) {
  1223  			return nil
  1224  		}
  1225  		return errorutils.HandleCredentialError(errors.Annotatef(err, "canceling deployment %q", name), ctx)
  1226  	}
  1227  	return nil
  1228  }
  1229  
  1230  func isDeployConflictError(err error) bool {
  1231  	if errorutils.IsConflictError(err) {
  1232  		code := errorutils.ErrorCode(err)
  1233  		if code == serviceErrorCodeDeploymentCannotBeCancelled ||
  1234  			code == serviceErrorCodeResourceGroupBeingDeleted {
  1235  			return true
  1236  		}
  1237  	}
  1238  	return false
  1239  }
  1240  
  1241  // deleteVirtualMachine deletes a virtual machine and all of the resources that
  1242  // it owns, and any corresponding network security rules.
  1243  func (env *azureEnviron) deleteVirtualMachine(
  1244  	ctx context.ProviderCallContext,
  1245  	instId instance.Id,
  1246  	networkInterfaces []*armnetwork.Interface,
  1247  	publicIPAddresses []*armnetwork.PublicIPAddress,
  1248  ) error {
  1249  	vmName := string(instId)
  1250  
  1251  	// TODO(axw) delete resources concurrently.
  1252  
  1253  	compute, err := env.computeClient()
  1254  	if err != nil {
  1255  		return errors.Trace(err)
  1256  	}
  1257  	// The VM must be deleted first, to release the lock on its armresources.
  1258  	logger.Debugf("- deleting virtual machine (%s)", vmName)
  1259  	poller, err := compute.BeginDelete(ctx, env.resourceGroup, vmName, nil)
  1260  	if err == nil {
  1261  		_, err = poller.PollUntilDone(ctx, nil)
  1262  	}
  1263  	if err != nil {
  1264  		if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) {
  1265  			return errors.Annotate(err, "deleting virtual machine")
  1266  		}
  1267  	}
  1268  	// Delete the managed OS disk.
  1269  	logger.Debugf("- deleting OS disk (%s)", vmName)
  1270  	disks, err := env.disksClient()
  1271  	if err != nil {
  1272  		return errors.Trace(err)
  1273  	}
  1274  	diskPoller, err := disks.BeginDelete(ctx, env.resourceGroup, vmName, nil)
  1275  	if err == nil {
  1276  		_, err = diskPoller.PollUntilDone(ctx, nil)
  1277  	}
  1278  	if err != nil {
  1279  		if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) {
  1280  			return errors.Annotate(err, "deleting OS disk")
  1281  		}
  1282  	}
  1283  	logger.Debugf("- deleting security rules (%s)", vmName)
  1284  	if err := deleteInstanceNetworkSecurityRules(
  1285  		ctx,
  1286  		env, instId, networkInterfaces,
  1287  	); err != nil {
  1288  		return errors.Annotate(err, "deleting network security rules")
  1289  	}
  1290  
  1291  	logger.Debugf("- deleting network interfaces (%s)", vmName)
  1292  	interfaces, err := env.interfacesClient()
  1293  	if err != nil {
  1294  		return errors.Trace(err)
  1295  	}
  1296  	for _, nic := range networkInterfaces {
  1297  		nicName := toValue(nic.Name)
  1298  		logger.Tracef("deleting NIC %q", nicName)
  1299  		nicPoller, err := interfaces.BeginDelete(ctx, env.resourceGroup, nicName, nil)
  1300  		if err == nil {
  1301  			_, err = nicPoller.PollUntilDone(ctx, nil)
  1302  		}
  1303  		if err != nil {
  1304  			if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) {
  1305  				return errors.Annotate(err, "deleting NIC")
  1306  			}
  1307  		}
  1308  	}
  1309  
  1310  	logger.Debugf("- deleting public IPs (%s)", vmName)
  1311  	publicAddresses, err := env.publicAddressesClient()
  1312  	if err != nil {
  1313  		return errors.Trace(err)
  1314  	}
  1315  	for _, pip := range publicIPAddresses {
  1316  		pipName := toValue(pip.Name)
  1317  		logger.Tracef("deleting public IP %q", pipName)
  1318  		ipPoller, err := publicAddresses.BeginDelete(ctx, env.resourceGroup, pipName, nil)
  1319  		if err == nil {
  1320  			_, err = ipPoller.PollUntilDone(ctx, nil)
  1321  		}
  1322  		if err != nil {
  1323  			if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) {
  1324  				return errors.Annotate(err, "deleting public IP")
  1325  			}
  1326  		}
  1327  	}
  1328  
  1329  	// The deployment must be deleted last, or we risk leaking armresources.
  1330  	logger.Debugf("- deleting deployment (%s)", vmName)
  1331  	deploy, err := env.deployClient()
  1332  	if err != nil {
  1333  		return errors.Trace(err)
  1334  	}
  1335  	deploymentPoller, err := deploy.BeginDelete(ctx, env.resourceGroup, vmName, nil)
  1336  	if err == nil {
  1337  		_, err = deploymentPoller.PollUntilDone(ctx, nil)
  1338  	}
  1339  	if err != nil {
  1340  		ignoreError := isDeployConflictError(err) || errorutils.IsNotFoundError(err)
  1341  		if !ignoreError || errorutils.MaybeInvalidateCredential(err, ctx) {
  1342  			return errors.Annotate(err, "deleting deployment")
  1343  		}
  1344  	}
  1345  	return nil
  1346  }
  1347  
  1348  // AdoptResources is part of the Environ interface.
  1349  func (env *azureEnviron) AdoptResources(ctx context.ProviderCallContext, controllerUUID string, _ version.Number) error {
  1350  	resourceGroups, err := env.resourceGroupsClient()
  1351  	if err != nil {
  1352  		return errors.Trace(err)
  1353  	}
  1354  	err = env.updateGroupControllerTag(ctx, resourceGroups, env.resourceGroup, controllerUUID)
  1355  	if err != nil {
  1356  		// If we can't update the group there's no point updating the
  1357  		// contained resources - the group will be killed if the
  1358  		// controller is destroyed, taking the other things with it.
  1359  		return errors.Trace(err)
  1360  	}
  1361  
  1362  	providers, err := env.providersClient()
  1363  	if err != nil {
  1364  		// If we can't update the group there's no point updating the
  1365  		// contained resources - the group will be killed if the
  1366  		// controller is destroyed, taking the other things with it.
  1367  		return errors.Trace(err)
  1368  	}
  1369  	apiVersions, err := collectAPIVersions(ctx, providers)
  1370  	if err != nil {
  1371  		return errors.Trace(err)
  1372  	}
  1373  
  1374  	resources, err := env.resourcesClient()
  1375  	if err != nil {
  1376  		return errors.Trace(err)
  1377  	}
  1378  	var failed []string
  1379  	pager := resources.NewListByResourceGroupPager(env.resourceGroup, nil)
  1380  	for pager.More() {
  1381  		next, err := pager.NextPage(ctx)
  1382  		if err != nil {
  1383  			return errorutils.HandleCredentialError(errors.Annotate(err, "listing resources"), ctx)
  1384  		}
  1385  		for _, res := range next.Value {
  1386  			apiVersion := apiVersions[toValue(res.Type)]
  1387  			err := env.updateResourceControllerTag(
  1388  				ctx,
  1389  				resources,
  1390  				res, controllerUUID, apiVersion,
  1391  			)
  1392  			if err != nil {
  1393  				name := toValue(res.Name)
  1394  				logger.Errorf("error updating resource tags for %q: %v", name, err)
  1395  				failed = append(failed, name)
  1396  			}
  1397  		}
  1398  	}
  1399  	if len(failed) > 0 {
  1400  		return errors.Errorf("failed to update controller for some resources: %v", failed)
  1401  	}
  1402  
  1403  	return nil
  1404  }
  1405  
  1406  func (env *azureEnviron) updateGroupControllerTag(ctx context.ProviderCallContext, client *armresources.ResourceGroupsClient, groupName, controllerUUID string) error {
  1407  	group, err := client.Get(ctx, groupName, nil)
  1408  	if err != nil {
  1409  		return errorutils.HandleCredentialError(errors.Trace(err), ctx)
  1410  	}
  1411  
  1412  	logger.Debugf(
  1413  		"updating resource group %s juju controller uuid to %s",
  1414  		toValue(group.Name), controllerUUID,
  1415  	)
  1416  	group.Tags[tags.JujuController] = to.Ptr(controllerUUID)
  1417  
  1418  	// The Azure API forbids specifying ProvisioningState on the update.
  1419  	if group.Properties != nil {
  1420  		(*group.Properties).ProvisioningState = nil
  1421  	}
  1422  
  1423  	_, err = client.CreateOrUpdate(ctx, groupName, group.ResourceGroup, nil)
  1424  	return errorutils.HandleCredentialError(errors.Annotatef(err, "updating controller for resource group %q", groupName), ctx)
  1425  }
  1426  
  1427  func (env *azureEnviron) updateResourceControllerTag(
  1428  	ctx context.ProviderCallContext,
  1429  	client *armresources.Client,
  1430  	stubResource *armresources.GenericResourceExpanded,
  1431  	controllerUUID string,
  1432  	apiVersion string,
  1433  ) error {
  1434  	stubTags := toMap(stubResource.Tags)
  1435  	if stubTags[tags.JujuController] == controllerUUID {
  1436  		// No update needed.
  1437  		return nil
  1438  	}
  1439  
  1440  	// Need to get the resource individually to ensure that the
  1441  	// properties are populated.
  1442  	resource, err := client.GetByID(ctx, toValue(stubResource.ID), apiVersion, nil)
  1443  	if err != nil {
  1444  		return errorutils.HandleCredentialError(errors.Annotatef(err, "getting full resource %q", toValue(stubResource.Name)), ctx)
  1445  	}
  1446  
  1447  	logger.Debugf("updating %s juju controller UUID to %s", toValue(stubResource.ID), controllerUUID)
  1448  	if resource.Tags == nil {
  1449  		resource.Tags = make(map[string]*string)
  1450  	}
  1451  	resource.Tags[tags.JujuController] = to.Ptr(controllerUUID)
  1452  	_, err = client.BeginCreateOrUpdateByID(
  1453  		ctx,
  1454  		toValue(stubResource.ID),
  1455  		apiVersion,
  1456  		resource.GenericResource,
  1457  		nil,
  1458  	)
  1459  	return errorutils.HandleCredentialError(errors.Annotatef(err, "updating controller for %q", toValue(resource.Name)), ctx)
  1460  }
  1461  
  1462  var (
  1463  	runningInstStates = []armresources.ProvisioningState{
  1464  		armresources.ProvisioningStateCreating,
  1465  		armresources.ProvisioningStateUpdating,
  1466  		armresources.ProvisioningStateSucceeded,
  1467  	}
  1468  )
  1469  
  1470  // Instances is specified in the Environ interface.
  1471  func (env *azureEnviron) Instances(ctx context.ProviderCallContext, ids []instance.Id) ([]instances.Instance, error) {
  1472  	if len(ids) == 0 {
  1473  		return nil, nil
  1474  	}
  1475  	insts := make([]instances.Instance, len(ids))
  1476  	// Make a series of requests to cope with eventual consistency.
  1477  	// Each request will attempt to add more instances to the requested set.
  1478  	err := retry.Call(retry.CallArgs{
  1479  		Func: func() error {
  1480  			var need []instance.Id
  1481  			for i, inst := range insts {
  1482  				if inst == nil {
  1483  					need = append(need, ids[i])
  1484  				}
  1485  			}
  1486  			return env.gatherInstances(ctx, need, insts, env.resourceGroup, true)
  1487  		},
  1488  		IsFatalError: func(err error) bool {
  1489  			return err != environs.ErrPartialInstances
  1490  		},
  1491  		Attempts:    -1,
  1492  		Delay:       200 * time.Millisecond,
  1493  		MaxDuration: 5 * time.Second,
  1494  		Clock:       env.provider.config.RetryClock,
  1495  	})
  1496  
  1497  	if err == environs.ErrPartialInstances {
  1498  		for _, inst := range insts {
  1499  			if inst != nil {
  1500  				return insts, environs.ErrPartialInstances
  1501  			}
  1502  		}
  1503  		return nil, environs.ErrNoInstances
  1504  	}
  1505  	if err != nil {
  1506  		return nil, errors.Trace(err)
  1507  	}
  1508  	return insts, nil
  1509  }
  1510  
  1511  // AllInstances is specified in the InstanceBroker interface.
  1512  func (env *azureEnviron) AllInstances(ctx context.ProviderCallContext) ([]instances.Instance, error) {
  1513  	return env.allInstances(ctx, env.resourceGroup, true, "")
  1514  }
  1515  
  1516  // AllRunningInstances is specified in the InstanceBroker interface.
  1517  func (env *azureEnviron) AllRunningInstances(ctx context.ProviderCallContext) ([]instances.Instance, error) {
  1518  	return env.allInstances(ctx, env.resourceGroup, true, "", runningInstStates...)
  1519  }
  1520  
  1521  // gatherInstances tries to get information on each instance id
  1522  // whose corresponding insts slot is nil.
  1523  // This function returns environs.ErrPartialInstances if the
  1524  // insts slice has not been completely filled.
  1525  func (env *azureEnviron) gatherInstances(
  1526  	ctx context.ProviderCallContext,
  1527  	ids []instance.Id,
  1528  	insts []instances.Instance,
  1529  	resourceGroup string,
  1530  	refreshAddresses bool,
  1531  	instStates ...armresources.ProvisioningState,
  1532  ) error {
  1533  	allInst, err := env.allInstances(ctx, resourceGroup, refreshAddresses, "", instStates...)
  1534  	if err != nil {
  1535  		return errors.Trace(err)
  1536  	}
  1537  
  1538  	numFound := 0
  1539  	// For each requested id, add it to the returned instances
  1540  	// if we find it in the latest queried cloud instances.
  1541  	for i, id := range ids {
  1542  		if insts[i] != nil {
  1543  			numFound++
  1544  			continue
  1545  		}
  1546  		for _, inst := range allInst {
  1547  			if inst.Id() != id {
  1548  				continue
  1549  			}
  1550  			insts[i] = inst
  1551  			numFound++
  1552  		}
  1553  	}
  1554  	if numFound < len(ids) {
  1555  		return environs.ErrPartialInstances
  1556  	}
  1557  	return nil
  1558  }
  1559  
  1560  // allInstances returns all instances in the environment
  1561  // with one of the specified instance states.
  1562  // If no instance states are specified, then return all instances.
  1563  func (env *azureEnviron) allInstances(
  1564  	ctx context.ProviderCallContext,
  1565  	resourceGroup string,
  1566  	refreshAddresses bool,
  1567  	controllerUUID string,
  1568  	instStates ...armresources.ProvisioningState,
  1569  ) ([]instances.Instance, error) {
  1570  	// Instances may be queued for deployment but provisioning has not yet started.
  1571  	queued, err := env.allQueuedInstances(ctx, resourceGroup, controllerUUID != "")
  1572  	if err != nil {
  1573  		return nil, errors.Trace(err)
  1574  	}
  1575  	provisioned, err := env.allProvisionedInstances(ctx, resourceGroup, controllerUUID, instStates...)
  1576  	if err != nil {
  1577  		return nil, errors.Trace(err)
  1578  	}
  1579  
  1580  	// Any provisioned or provisioning instances take precedence
  1581  	// over any entries in the queued slice.
  1582  	seenInst := set.NewStrings()
  1583  	azureInstances := provisioned
  1584  	for _, p := range provisioned {
  1585  		seenInst.Add(string(p.Id()))
  1586  	}
  1587  	for _, q := range queued {
  1588  		if seenInst.Contains(string(q.Id())) {
  1589  			continue
  1590  		}
  1591  		azureInstances = append(azureInstances, q)
  1592  	}
  1593  
  1594  	// Get the instance addresses if needed.
  1595  	if len(azureInstances) > 0 && refreshAddresses {
  1596  		if err := env.setInstanceAddresses(
  1597  			ctx,
  1598  			resourceGroup,
  1599  			azureInstances,
  1600  		); err != nil {
  1601  			return nil, errors.Trace(err)
  1602  		}
  1603  	}
  1604  
  1605  	var result []instances.Instance
  1606  	for _, inst := range azureInstances {
  1607  		result = append(result, inst)
  1608  	}
  1609  	sort.Slice(result, func(i, j int) bool {
  1610  		return result[i].Id() < result[j].Id()
  1611  	})
  1612  	return result, nil
  1613  }
  1614  
  1615  // allQueuedInstances returns any pending or failed machine deployments
  1616  // in the given resource group.
  1617  func (env *azureEnviron) allQueuedInstances(
  1618  	ctx context.ProviderCallContext,
  1619  	resourceGroup string,
  1620  	controllerOnly bool,
  1621  ) ([]*azureInstance, error) {
  1622  	deploy, err := env.deployClient()
  1623  	if err != nil {
  1624  		return nil, errors.Trace(err)
  1625  	}
  1626  	var azureInstances []*azureInstance
  1627  	pager := deploy.NewListByResourceGroupPager(resourceGroup, nil)
  1628  	for pager.More() {
  1629  		next, err := pager.NextPage(ctx)
  1630  		if err != nil {
  1631  			if errorutils.IsNotFoundError(err) {
  1632  				// This will occur if the resource group does not
  1633  				// exist, e.g. in a fresh hosted environment.
  1634  				return nil, nil
  1635  			}
  1636  			return nil, errorutils.HandleCredentialError(errors.Trace(err), ctx)
  1637  		}
  1638  		for _, deployment := range next.Value {
  1639  			deployProvisioningState := armresources.ProvisioningStateNotSpecified
  1640  			deployError := "Failed"
  1641  			if deployment.Properties != nil {
  1642  				deployProvisioningState = toValue(deployment.Properties.ProvisioningState)
  1643  				deployError = string(deployProvisioningState)
  1644  				if deployment.Properties.Error != nil {
  1645  					deployError = toValue(deployment.Properties.Error.Message)
  1646  					if deployment.Properties.Error.Details != nil && len(deployment.Properties.Error.Details) > 0 {
  1647  						deployError = toValue((deployment.Properties.Error.Details)[0].Message)
  1648  					}
  1649  				}
  1650  			}
  1651  			switch deployProvisioningState {
  1652  			case armresources.ProvisioningStateAccepted,
  1653  				armresources.ProvisioningStateCreating,
  1654  				armresources.ProvisioningStateRunning,
  1655  				armresources.ProvisioningStateFailed,
  1656  				armresources.ProvisioningStateCanceled,
  1657  				armresources.ProvisioningStateNotSpecified:
  1658  			default:
  1659  				continue
  1660  			}
  1661  			name := toValue(deployment.Name)
  1662  			if _, err := names.ParseMachineTag(name); err != nil {
  1663  				// Deployments we create for Juju machines are named
  1664  				// with the machine tag. We also create a "common"
  1665  				// deployment, so this will exclude that VM and any
  1666  				// other stray deployment armresources.
  1667  				continue
  1668  			}
  1669  			if deployment.Properties == nil || deployment.Properties.Dependencies == nil {
  1670  				continue
  1671  			}
  1672  			if controllerOnly && !isControllerDeployment(deployment) {
  1673  				continue
  1674  			}
  1675  			if len(deployment.Tags) == 0 {
  1676  				continue
  1677  			}
  1678  			if toValue(deployment.Tags[tags.JujuModel]) != env.Config().UUID() {
  1679  				continue
  1680  			}
  1681  			provisioningState := armresources.ProvisioningStateCreating
  1682  			switch deployProvisioningState {
  1683  			case armresources.ProvisioningStateFailed,
  1684  				armresources.ProvisioningStateCanceled:
  1685  				provisioningState = armresources.ProvisioningStateFailed
  1686  			}
  1687  			inst := &azureInstance{
  1688  				vmName:            name,
  1689  				provisioningState: provisioningState,
  1690  				provisioningError: deployError,
  1691  				env:               env,
  1692  			}
  1693  			azureInstances = append(azureInstances, inst)
  1694  		}
  1695  	}
  1696  	return azureInstances, nil
  1697  }
  1698  
  1699  func isControllerDeployment(deployment *armresources.DeploymentExtended) bool {
  1700  	if deployment.Properties == nil {
  1701  		return false
  1702  	}
  1703  	for _, d := range deployment.Properties.Dependencies {
  1704  		if d.DependsOn == nil {
  1705  			continue
  1706  		}
  1707  		if toValue(d.ResourceType) != "Microsoft.Compute/virtualMachines" {
  1708  			continue
  1709  		}
  1710  		for _, on := range d.DependsOn {
  1711  			if toValue(on.ResourceType) != "Microsoft.Compute/availabilitySets" {
  1712  				continue
  1713  			}
  1714  			if toValue(on.ResourceName) == controllerAvailabilitySet {
  1715  				return true
  1716  			}
  1717  		}
  1718  	}
  1719  	return false
  1720  }
  1721  
  1722  // allProvisionedInstances returns all of the instances
  1723  // in the given resource group.
  1724  func (env *azureEnviron) allProvisionedInstances(
  1725  	ctx context.ProviderCallContext,
  1726  	resourceGroup string,
  1727  	controllerUUID string,
  1728  	instStates ...armresources.ProvisioningState,
  1729  ) ([]*azureInstance, error) {
  1730  	compute, err := env.computeClient()
  1731  	if err != nil {
  1732  		return nil, errors.Trace(err)
  1733  	}
  1734  
  1735  	var azureInstances []*azureInstance
  1736  	pager := compute.NewListPager(resourceGroup, nil)
  1737  	for pager.More() {
  1738  		next, err := pager.NextPage(ctx)
  1739  		if err != nil {
  1740  			if errorutils.IsNotFoundError(err) {
  1741  				// This will occur if the resource group does not
  1742  				// exist, e.g. in a fresh hosted environment.
  1743  				return nil, nil
  1744  			}
  1745  			return nil, errorutils.HandleCredentialError(errors.Trace(err), ctx)
  1746  		}
  1747  		for _, vm := range next.Value {
  1748  			name := toValue(vm.Name)
  1749  			provisioningState := armresources.ProvisioningStateNotSpecified
  1750  			if vm.Properties != nil {
  1751  				provisioningState = armresources.ProvisioningState(toValue(vm.Properties.ProvisioningState))
  1752  			}
  1753  			if len(instStates) > 0 {
  1754  				haveState := false
  1755  				for _, wantState := range instStates {
  1756  					if provisioningState == wantState {
  1757  						haveState = true
  1758  						break
  1759  					}
  1760  				}
  1761  				if !haveState {
  1762  					continue
  1763  				}
  1764  			}
  1765  			if !isControllerInstance(vm, controllerUUID) {
  1766  				continue
  1767  			}
  1768  			if len(vm.Tags) == 0 {
  1769  				continue
  1770  			}
  1771  			if toValue(vm.Tags[tags.JujuModel]) != env.Config().UUID() {
  1772  				continue
  1773  			}
  1774  			inst := &azureInstance{
  1775  				vmName:            name,
  1776  				provisioningState: provisioningState,
  1777  				env:               env,
  1778  			}
  1779  			azureInstances = append(azureInstances, inst)
  1780  		}
  1781  	}
  1782  	return azureInstances, nil
  1783  }
  1784  
  1785  func isControllerInstance(vm *armcompute.VirtualMachine, controllerUUID string) bool {
  1786  	if controllerUUID == "" {
  1787  		return true
  1788  	}
  1789  	vmTags := vm.Tags
  1790  	if v, ok := vmTags[tags.JujuIsController]; !ok || toValue(v) != "true" {
  1791  		return false
  1792  	}
  1793  	if v, ok := vmTags[tags.JujuController]; !ok || toValue(v) != controllerUUID {
  1794  		return false
  1795  	}
  1796  	return true
  1797  }
  1798  
  1799  // Destroy is specified in the Environ interface.
  1800  func (env *azureEnviron) Destroy(ctx context.ProviderCallContext) error {
  1801  	logger.Debugf("destroying model %q", env.modelName)
  1802  	logger.Debugf("- deleting resource group %q", env.resourceGroup)
  1803  	if err := env.deleteResourceGroup(ctx, env.resourceGroup); err != nil {
  1804  		return errors.Trace(err)
  1805  	}
  1806  	// Resource groups are self-contained and fully encompass
  1807  	// all environ armresources. Once you delete the group, there
  1808  	// is nothing else to do.
  1809  	return nil
  1810  }
  1811  
  1812  // DestroyController is specified in the Environ interface.
  1813  func (env *azureEnviron) DestroyController(ctx context.ProviderCallContext, controllerUUID string) error {
  1814  	logger.Debugf("destroying model %q", env.modelName)
  1815  	logger.Debugf("deleting resource groups")
  1816  	if err := env.deleteControllerManagedResourceGroups(ctx, controllerUUID); err != nil {
  1817  		return errors.Trace(err)
  1818  	}
  1819  	// Resource groups are self-contained and fully encompass
  1820  	// all environ armresources. Once you delete the group, there
  1821  	// is nothing else to do.
  1822  	return nil
  1823  }
  1824  
  1825  func (env *azureEnviron) deleteControllerManagedResourceGroups(ctx context.ProviderCallContext, controllerUUID string) error {
  1826  	resourceGroups, err := env.resourceGroupsClient()
  1827  	if err != nil {
  1828  		return errors.Trace(err)
  1829  	}
  1830  	filter := fmt.Sprintf(
  1831  		"tagName eq '%s' and tagValue eq '%s'",
  1832  		tags.JujuController, controllerUUID,
  1833  	)
  1834  	pager := resourceGroups.NewListPager(&armresources.ResourceGroupsClientListOptions{
  1835  		Filter: to.Ptr(filter),
  1836  	})
  1837  	var groupNames []*string
  1838  	for pager.More() {
  1839  		next, err := pager.NextPage(ctx)
  1840  		if err != nil {
  1841  			return errorutils.HandleCredentialError(errors.Annotate(err, "listing resource groups"), ctx)
  1842  		}
  1843  		// Walk all the pages of results so we can get a total list of groups to remove.
  1844  		for _, result := range next.Value {
  1845  			groupNames = append(groupNames, result.Name)
  1846  		}
  1847  	}
  1848  	// Deleting groups can take a long time, so make sure they are
  1849  	// deleted in parallel.
  1850  	var wg sync.WaitGroup
  1851  	errs := make([]error, len(groupNames))
  1852  	for i, name := range groupNames {
  1853  		groupName := toValue(name)
  1854  		logger.Debugf("  - deleting resource group %q", groupName)
  1855  		wg.Add(1)
  1856  		go func(i int) {
  1857  			defer wg.Done()
  1858  			if err := env.deleteResourceGroup(ctx, groupName); err != nil {
  1859  				errs[i] = errors.Annotatef(
  1860  					err, "deleting resource group %q", groupName,
  1861  				)
  1862  			}
  1863  		}(i)
  1864  	}
  1865  	wg.Wait()
  1866  
  1867  	// If there is just one error, return it. If there are multiple,
  1868  	// then combine their messages.
  1869  	var nonNilErrs []error
  1870  	for _, err := range errs {
  1871  		if err != nil {
  1872  			nonNilErrs = append(nonNilErrs, err)
  1873  		}
  1874  	}
  1875  	switch len(nonNilErrs) {
  1876  	case 0:
  1877  		return nil
  1878  	case 1:
  1879  		return nonNilErrs[0]
  1880  	}
  1881  	combined := make([]string, len(nonNilErrs))
  1882  	for i, err := range nonNilErrs {
  1883  		combined[i] = err.Error()
  1884  	}
  1885  	return errors.New(strings.Join(combined, "; "))
  1886  }
  1887  
  1888  func (env *azureEnviron) deleteResourceGroup(ctx context.ProviderCallContext, resourceGroup string) error {
  1889  	// For user specified, existing resource groups, delete the contents, not the group.
  1890  	if env.config.resourceGroupName != "" {
  1891  		return env.deleteResourcesInGroup(ctx, resourceGroup)
  1892  	}
  1893  	resourceGroups, err := env.resourceGroupsClient()
  1894  	if err != nil {
  1895  		return errors.Trace(err)
  1896  	}
  1897  	poller, err := resourceGroups.BeginDelete(ctx, resourceGroup, nil)
  1898  	if err == nil {
  1899  		_, err = poller.PollUntilDone(ctx, nil)
  1900  	}
  1901  	if err != nil {
  1902  		if errorutils.MaybeInvalidateCredential(err, ctx) || !errorutils.IsNotFoundError(err) {
  1903  			return errors.Annotatef(err, "deleting resource group %q", resourceGroup)
  1904  		}
  1905  	}
  1906  	return nil
  1907  }
  1908  
  1909  func (env *azureEnviron) deleteResourcesInGroup(ctx context.ProviderCallContext, resourceGroup string) (err error) {
  1910  	logger.Debugf("deleting all resources in %s", resourceGroup)
  1911  
  1912  	defer func() {
  1913  		err = errorutils.HandleCredentialError(err, ctx)
  1914  	}()
  1915  
  1916  	// Find all the resources tagged as belonging to this model.
  1917  	filter := fmt.Sprintf("tagName eq '%s' and tagValue eq '%s'", tags.JujuModel, env.config.UUID())
  1918  	resourceItems, err := env.getModelResources(ctx, resourceGroup, filter)
  1919  	if err != nil {
  1920  		return errors.Trace(err)
  1921  	}
  1922  
  1923  	// Older APIs can ignore the filter above, so query the hard way just in case.
  1924  	if len(resourceItems) == 0 {
  1925  		resourceItems, err = env.getModelResources(ctx, resourceGroup, filter)
  1926  		if err != nil {
  1927  			return errors.Trace(err)
  1928  		}
  1929  	}
  1930  
  1931  	// These will be deleted as part of stopping the instance below.
  1932  	machineResourceTypes := set.NewStrings(
  1933  		"Microsoft.Compute/virtualMachines",
  1934  		"Microsoft.Compute/disks",
  1935  		"Microsoft.Network/publicIPAddresses",
  1936  		"Microsoft.Network/networkInterfaces",
  1937  	)
  1938  
  1939  	var (
  1940  		instIds        []instance.Id
  1941  		vaultNames     []string
  1942  		otherResources []*armresources.GenericResourceExpanded
  1943  	)
  1944  	for _, r := range resourceItems {
  1945  		rType := toValue(r.Type)
  1946  		logger.Debugf("resource to delete: %v (%v)", toValue(r.Name), rType)
  1947  		// Vault resources are handled by a separate client.
  1948  		if rType == "Microsoft.KeyVault/vaults" {
  1949  			vaultNames = append(vaultNames, toValue(r.Name))
  1950  			continue
  1951  		}
  1952  		if rType == "Microsoft.Compute/virtualMachines" {
  1953  			instIds = append(instIds, instance.Id(toValue(r.Name)))
  1954  			continue
  1955  		}
  1956  		if !machineResourceTypes.Contains(rType) {
  1957  			otherResources = append(otherResources, r)
  1958  		}
  1959  	}
  1960  
  1961  	// Stopping instances will also remove most of their dependent armresources.
  1962  	err = env.StopInstances(ctx, instIds...)
  1963  	if err != nil {
  1964  		return errors.Annotatef(err, "deleting machine instances %q", instIds)
  1965  	}
  1966  
  1967  	// Loop until all remaining resources are deleted.
  1968  	// For safety, add an upper retry limit; in reality, this will never be hit.
  1969  	remainingResources := otherResources
  1970  	retries := 0
  1971  	for len(remainingResources) > 0 && retries < 10 {
  1972  		remainingResources, err = env.deleteResources(ctx, remainingResources)
  1973  		if err != nil {
  1974  			return errors.Trace(err)
  1975  		}
  1976  		retries++
  1977  	}
  1978  	if len(remainingResources) > 0 {
  1979  		logger.Warningf("could not delete all Azure resources, remaining: %v", remainingResources)
  1980  	}
  1981  
  1982  	// Lastly delete the vault armresources.
  1983  	for _, vaultName := range vaultNames {
  1984  		if err := env.deleteVault(ctx, vaultName); err != nil {
  1985  			return errors.Trace(err)
  1986  		}
  1987  	}
  1988  	return nil
  1989  }
  1990  
  1991  func (env *azureEnviron) getModelResources(sdkCtx stdcontext.Context, resourceGroup, modelFilter string) ([]*armresources.GenericResourceExpanded, error) {
  1992  	resources, err := env.resourcesClient()
  1993  	if err != nil {
  1994  		return nil, errors.Trace(err)
  1995  	}
  1996  	var resourceItems []*armresources.GenericResourceExpanded
  1997  	pager := resources.NewListByResourceGroupPager(resourceGroup, &armresources.ClientListByResourceGroupOptions{
  1998  		Filter: to.Ptr(modelFilter),
  1999  	})
  2000  	for pager.More() {
  2001  		next, err := pager.NextPage(sdkCtx)
  2002  		if err != nil {
  2003  			return nil, errors.Annotate(err, "listing resources to delete")
  2004  		}
  2005  		for _, res := range next.Value {
  2006  			// If no modelFilter specified, we need to check that the resource
  2007  			// belongs to this model.
  2008  			if modelFilter == "" {
  2009  				fullRes, err := resources.GetByID(sdkCtx, toValue(res.ID), computeAPIVersion, nil)
  2010  				if err != nil {
  2011  					return nil, errors.Trace(err)
  2012  				}
  2013  				if env.config.UUID() != toValue(fullRes.Tags[tags.JujuModel]) {
  2014  					continue
  2015  				}
  2016  			}
  2017  			resourceItems = append(resourceItems, res)
  2018  		}
  2019  	}
  2020  	return resourceItems, nil
  2021  }
  2022  
  2023  // deleteResources deletes the specified resources, returning any that
  2024  // cannot be deleted because they are in use.
  2025  func (env *azureEnviron) deleteResources(sdkCtx stdcontext.Context, toDelete []*armresources.GenericResourceExpanded) ([]*armresources.GenericResourceExpanded, error) {
  2026  	logger.Debugf("deleting %d resources", len(toDelete))
  2027  
  2028  	var remainingResources []*armresources.GenericResourceExpanded
  2029  	var wg sync.WaitGroup
  2030  	deleteResults := make([]error, len(toDelete))
  2031  	for i, res := range toDelete {
  2032  		id := toValue(res.ID)
  2033  		logger.Debugf("- deleting resource %q", id)
  2034  		wg.Add(1)
  2035  		go func(i int, id string) {
  2036  			defer wg.Done()
  2037  			resources, err := env.resourcesClient()
  2038  			if err != nil {
  2039  				deleteResults[i] = err
  2040  				return
  2041  			}
  2042  			poller, err := resources.BeginDeleteByID(sdkCtx, id, computeAPIVersion, nil)
  2043  			if err == nil {
  2044  				_, err = poller.PollUntilDone(sdkCtx, nil)
  2045  			}
  2046  			if err != nil {
  2047  				if errorutils.IsNotFoundError(err) {
  2048  					return
  2049  				}
  2050  				// If the resource is in use, don't error, just queue it up for another pass.
  2051  				if strings.HasPrefix(errorutils.ErrorCode(err), "InUse") {
  2052  					remainingResources = append(remainingResources, toDelete[i])
  2053  				} else {
  2054  					deleteResults[i] = errors.Annotatef(err, "deleting resource %q: %v", id, err)
  2055  				}
  2056  				return
  2057  			}
  2058  		}(i, id)
  2059  	}
  2060  	wg.Wait()
  2061  
  2062  	var errStrings []string
  2063  	for i, err := range deleteResults {
  2064  		if err != nil && !errors.IsNotFound(err) {
  2065  			msg := fmt.Sprintf("error deleting resource %q: %#v", toValue(toDelete[i].ID), err)
  2066  			errStrings = append(errStrings, msg)
  2067  		}
  2068  	}
  2069  	if len(errStrings) > 0 {
  2070  		return nil, errors.Annotate(errors.New(strings.Join(errStrings, "\n")), "deleting resources")
  2071  	}
  2072  	return remainingResources, nil
  2073  }
  2074  
  2075  // Provider is specified in the Environ interface.
  2076  func (env *azureEnviron) Provider() environs.EnvironProvider {
  2077  	return env.provider
  2078  }
  2079  
  2080  // resourceGroupName returns the name of the model's resource group to use.
  2081  // It may be that a legacy group name is already in use, so use that if present.
  2082  func (env *azureEnviron) resourceGroupName(ctx stdcontext.Context, modelTag names.ModelTag, modelName string) (string, error) {
  2083  	resourceGroups, err := env.resourceGroupsClient()
  2084  	if err != nil {
  2085  		return "", errors.Trace(err)
  2086  	}
  2087  	// First look for a resource group name with the full model UUID.
  2088  	legacyName := legacyResourceGroupName(modelTag, modelName)
  2089  	g, err := resourceGroups.Get(ctx, legacyName, nil)
  2090  	if err == nil {
  2091  		logger.Debugf("using existing legacy resource group %q for model %q", legacyName, modelName)
  2092  		return legacyName, nil
  2093  	}
  2094  	if !errorutils.IsNotFoundError(err) {
  2095  		return "", errors.Trace(err)
  2096  	}
  2097  
  2098  	logger.Debugf("legacy resource group name doesn't exist, using short name")
  2099  	resourceGroup := resourceGroupName(modelTag, modelName)
  2100  	g, err = resourceGroups.Get(ctx, resourceGroup, nil)
  2101  	if err == nil {
  2102  		mTag, ok := g.Tags[tags.JujuModel]
  2103  		if !ok || toValue(mTag) != modelTag.Id() {
  2104  			// This should never happen in practice - combination of model name and first 8
  2105  			// digits of UUID should be unique.
  2106  			return "", errors.Errorf("unexpected model UUID on resource group %q; expected %q, got %q", resourceGroup, modelTag.Id(), toValue(mTag))
  2107  		}
  2108  		return resourceGroup, nil
  2109  	}
  2110  	if errorutils.IsNotFoundError(err) {
  2111  		return resourceGroup, nil
  2112  	}
  2113  	return "", errors.Trace(err)
  2114  }
  2115  
  2116  // resourceGroupName returns the name of the environment's resource group.
  2117  func legacyResourceGroupName(modelTag names.ModelTag, modelName string) string {
  2118  	return fmt.Sprintf("juju-%s-%s", modelName, resourceName(modelTag))
  2119  }
  2120  
  2121  // resourceGroupName returns the name of the environment's resource group.
  2122  func resourceGroupName(modelTag names.ModelTag, modelName string) string {
  2123  	// The first chunk of the UUID string plus model name should be good enough.
  2124  	return fmt.Sprintf("juju-%s-%s", modelName, modelTag.Id()[:8])
  2125  }
  2126  
  2127  // resourceName returns the string to use for a resource's Name tag,
  2128  // to help users identify Juju-managed resources in the Azure portal.
  2129  //
  2130  // Since resources are grouped under resource groups, we just use the
  2131  // tag.
  2132  func resourceName(tag names.Tag) string {
  2133  	return tag.String()
  2134  }
  2135  
  2136  // getInstanceTypes gets the instance types available for the configured
  2137  // location, keyed by name.
  2138  func (env *azureEnviron) getInstanceTypes(ctx context.ProviderCallContext) (map[string]instances.InstanceType, error) {
  2139  	env.mu.Lock()
  2140  	defer env.mu.Unlock()
  2141  	instanceTypes, err := env.getInstanceTypesLocked(ctx)
  2142  	if err != nil {
  2143  		return nil, errors.Annotate(err, "getting instance types")
  2144  	}
  2145  	return instanceTypes, nil
  2146  }
  2147  
  2148  // getInstanceTypesLocked returns the instance types for Azure, by listing the
  2149  // role sizes available to the subscription.
  2150  func (env *azureEnviron) getInstanceTypesLocked(ctx context.ProviderCallContext) (map[string]instances.InstanceType, error) {
  2151  	if env.instanceTypes != nil {
  2152  		return env.instanceTypes, nil
  2153  	}
  2154  
  2155  	skus, err := env.resourceSKUsClient()
  2156  	if err != nil {
  2157  		return nil, errors.Trace(err)
  2158  	}
  2159  	instanceTypes := make(map[string]instances.InstanceType)
  2160  	pager := skus.NewListPager(nil)
  2161  	for pager.More() {
  2162  		next, err := pager.NextPage(ctx)
  2163  		if err != nil {
  2164  			return nil, errorutils.HandleCredentialError(errors.Annotate(err, "listing VM sizes"), ctx)
  2165  		}
  2166  	nextResource:
  2167  		for _, resource := range next.Value {
  2168  			if resource.ResourceType == nil || *resource.ResourceType != "virtualMachines" {
  2169  				continue
  2170  			}
  2171  			for _, r := range resource.Restrictions {
  2172  				if toValue(r.ReasonCode) == armcompute.ResourceSKURestrictionsReasonCodeNotAvailableForSubscription {
  2173  					continue nextResource
  2174  				}
  2175  			}
  2176  			locationOk := false
  2177  			if resource.Locations != nil {
  2178  				for _, loc := range resource.Locations {
  2179  					if strings.EqualFold(toValue(loc), env.location) {
  2180  						locationOk = true
  2181  						break
  2182  					}
  2183  				}
  2184  			}
  2185  			if !locationOk {
  2186  				continue
  2187  			}
  2188  			var (
  2189  				cores    *int32
  2190  				mem      *int32
  2191  				rootDisk *int32
  2192  			)
  2193  			for _, capability := range resource.Capabilities {
  2194  				if capability.Name == nil || capability.Value == nil {
  2195  					continue
  2196  				}
  2197  				switch toValue(capability.Name) {
  2198  				case "MemoryGB":
  2199  					memValue, _ := strconv.ParseFloat(*capability.Value, 32)
  2200  					mem = to.Ptr(int32(1024 * memValue))
  2201  				case "vCPUsAvailable", "vCPUs":
  2202  					coresValue, _ := strconv.Atoi(*capability.Value)
  2203  					cores = to.Ptr(int32(coresValue))
  2204  				case "OSVhdSizeMB":
  2205  					rootDiskValue, _ := strconv.Atoi(*capability.Value)
  2206  					rootDisk = to.Ptr(int32(rootDiskValue))
  2207  				}
  2208  			}
  2209  			instanceType := newInstanceType(armcompute.VirtualMachineSize{
  2210  				Name:           resource.Name,
  2211  				NumberOfCores:  cores,
  2212  				OSDiskSizeInMB: rootDisk,
  2213  				MemoryInMB:     mem,
  2214  			})
  2215  			instanceTypes[instanceType.Name] = instanceType
  2216  			// Create aliases for standard role sizes.
  2217  			if strings.HasPrefix(instanceType.Name, "Standard_") {
  2218  				instanceTypes[instanceType.Name[len("Standard_"):]] = instanceType
  2219  			}
  2220  		}
  2221  	}
  2222  	env.instanceTypes = instanceTypes
  2223  	return instanceTypes, nil
  2224  }
  2225  
  2226  // Region is specified in the HasRegion interface.
  2227  func (env *azureEnviron) Region() (simplestreams.CloudSpec, error) {
  2228  	return simplestreams.CloudSpec{
  2229  		Region:   env.cloud.Region,
  2230  		Endpoint: env.cloud.Endpoint,
  2231  	}, nil
  2232  }