github.com/alibaba/sealer@v0.8.6-0.20220430115802-37a2bdaa8173/pkg/infra/aliyun/ali_ecs.go (about)

     1  // Copyright © 2021 Alibaba Group Holding Ltd.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aliyun
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"math/rand"
    21  	"os"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
    27  	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses"
    28  	"github.com/aliyun/alibaba-cloud-sdk-go/services/ecs"
    29  
    30  	"github.com/alibaba/sealer/logger"
    31  	v1 "github.com/alibaba/sealer/types/api/v1"
    32  	"github.com/alibaba/sealer/utils"
    33  )
    34  
    35  type Instance struct {
    36  	CPU              int
    37  	Memory           int
    38  	InstanceID       string
    39  	PrimaryIPAddress string
    40  }
    41  
    42  type EcsManager struct {
    43  	/*	config Config
    44  		client *ecs.Client*/
    45  }
    46  
    47  func (a *AliProvider) RetryEcsRequest(request requests.AcsRequest, response responses.AcsResponse) error {
    48  	return a.RetryEcsAction(request, response, TryTimes)
    49  }
    50  
    51  func (a *AliProvider) RetryEcsAction(request requests.AcsRequest, response responses.AcsResponse, tryTimes int) error {
    52  	return utils.Retry(tryTimes, TrySleepTime, func() error {
    53  		err := a.EcsClient.DoAction(request, response)
    54  		if err != nil {
    55  			return err
    56  		}
    57  		return nil
    58  	})
    59  }
    60  
    61  func (a *AliProvider) RetryEcsInstanceType(request requests.AcsRequest, response responses.AcsResponse, instances []string) error {
    62  	for i := 0; i < len(instances); i++ {
    63  		switch req := request.(type) {
    64  		case *ecs.ModifyInstanceSpecRequest:
    65  			req.InstanceType = instances[i]
    66  		case *ecs.RunInstancesRequest:
    67  			req.InstanceType = instances[i]
    68  		}
    69  		err := a.RetryEcsAction(request, response, 4)
    70  		if err == nil {
    71  			logger.Debug("use instance type: %s", instances[i])
    72  			break
    73  		} else if i == len(instances)-1 {
    74  			return fmt.Errorf("failed to get ecs instance type, %v", err)
    75  		}
    76  	}
    77  	return nil
    78  }
    79  
    80  func (a *AliProvider) TryGetInstance(request *ecs.DescribeInstancesRequest, response *ecs.DescribeInstancesResponse, expectCount int) error {
    81  	return utils.Retry(TryTimes, TrySleepTime, func() error {
    82  		err := a.EcsClient.DoAction(request, response)
    83  		var ipList []string
    84  		if err != nil {
    85  			return err
    86  		}
    87  		instances := response.Instances.Instance
    88  		if expectCount == -1 {
    89  			return nil
    90  		}
    91  
    92  		if len(instances) != expectCount {
    93  			return errors.New("the number of instances is not as expected")
    94  		}
    95  		for _, instance := range instances {
    96  			if instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress == "" {
    97  				return errors.New("PrimaryIpAddress cannt nob be nil")
    98  			}
    99  			if len(ipList) != 0 && !utils.NotIn(instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress, ipList) {
   100  				return errors.New("PrimaryIpAddress cannt nob be same")
   101  			}
   102  
   103  			ipList = append(ipList, instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress)
   104  		}
   105  
   106  		return nil
   107  	})
   108  }
   109  
   110  func (a *AliProvider) InputIPlist(instanceRole string) (ipList []string, err error) {
   111  	var hosts *v1.Hosts
   112  	switch instanceRole {
   113  	case Master:
   114  		hosts = &a.Cluster.Spec.Masters
   115  	case Node:
   116  		hosts = &a.Cluster.Spec.Nodes
   117  	}
   118  	if hosts == nil {
   119  		return nil, err
   120  	}
   121  	instances, err := a.GetInstancesInfo(instanceRole, hosts.Count)
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  	for _, instance := range instances {
   126  		ipList = append(ipList, instance.PrimaryIPAddress)
   127  	}
   128  	return ipList, nil
   129  }
   130  
   131  func (a *AliProvider) CreatePassword() {
   132  	rand.Seed(time.Now().UnixNano())
   133  	digits := Digits
   134  	specials := Specials
   135  	letter := Letter
   136  	all := digits + specials + letter
   137  	length := PasswordLength
   138  	buf := make([]byte, length)
   139  	// #nosec
   140  	buf[0] = digits[rand.Intn(len(digits))]
   141  	// #nosec
   142  	buf[1] = specials[rand.Intn(len(specials))]
   143  	for i := 2; i < length; i++ {
   144  		// #nosec
   145  		buf[i] = all[rand.Intn(len(all))]
   146  	}
   147  	rand.Shuffle(len(buf), func(i, j int) {
   148  		buf[i], buf[j] = buf[j], buf[i]
   149  	})
   150  	a.Cluster.Spec.SSH.Passwd = string(buf)
   151  }
   152  
   153  func (a *AliProvider) GetInstanceStatus(instanceID string) (instanceStatus string, err error) {
   154  	request := ecs.CreateDescribeInstanceStatusRequest()
   155  	request.Scheme = Scheme
   156  	request.InstanceId = &[]string{instanceID}
   157  
   158  	//response, err := d.Client.DescribeInstanceStatus(request)
   159  	response := ecs.CreateDescribeInstanceStatusResponse()
   160  	err = a.RetryEcsRequest(request, response)
   161  	if err != nil {
   162  		return "", fmt.Errorf("get instance status failed %v , error :%v", instanceID, err)
   163  	}
   164  	if len(response.InstanceStatuses.InstanceStatus) == 0 {
   165  		return "", fmt.Errorf("instance list is empty")
   166  	}
   167  	return response.InstanceStatuses.InstanceStatus[0].Status, nil
   168  }
   169  
   170  func (a *AliProvider) PoweroffInstance(instanceID string) error {
   171  	request := ecs.CreateStopInstancesRequest()
   172  	request.Scheme = Scheme
   173  	request.InstanceId = &[]string{instanceID}
   174  
   175  	//_, err := d.Client.StopInstances(request)
   176  	response := ecs.CreateStopInstancesResponse()
   177  	return a.RetryEcsRequest(request, response)
   178  }
   179  
   180  func (a *AliProvider) StartInstance(instanceID string) error {
   181  	request := ecs.CreateStartInstanceRequest()
   182  	request.Scheme = Scheme
   183  	request.InstanceId = instanceID
   184  
   185  	//_, err := d.Client.StartInstance(request)
   186  	response := ecs.CreateStartInstanceResponse()
   187  	return a.RetryEcsRequest(request, response)
   188  }
   189  
   190  func (a *AliProvider) ChangeInstanceType(instanceID, cpu, memory string) error {
   191  	cpuInt, err := strconv.Atoi(cpu)
   192  	if err != nil {
   193  		return err
   194  	}
   195  	memoryFloat, err := strconv.ParseFloat(memory, 64)
   196  	if err != nil {
   197  		return err
   198  	}
   199  	instanceStatus, err := a.GetInstanceStatus(instanceID)
   200  	if err != nil {
   201  		return err
   202  	}
   203  	if instanceStatus != Stopped {
   204  		err = a.PoweroffInstance(instanceID)
   205  		if err != nil {
   206  			return err
   207  		}
   208  	}
   209  	expectInstanceType, err := a.GetAvailableResource(cpuInt, memoryFloat)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	request := ecs.CreateModifyInstanceSpecRequest()
   215  	request.Scheme = Scheme
   216  	request.InstanceId = instanceID
   217  	//_, err = d.Client.ModifyInstanceSpec(request)
   218  	response := ecs.CreateModifyInstanceSpecResponse()
   219  	err = a.RetryEcsInstanceType(request, response, expectInstanceType)
   220  	if err != nil {
   221  		return err
   222  	}
   223  	return a.StartInstance(instanceID)
   224  }
   225  
   226  func (a *AliProvider) GetInstancesInfo(instancesRole, expectCount string) (instances []Instance, err error) {
   227  	var count int
   228  	tag := make(map[string]string)
   229  	tag[Product] = a.Cluster.Name
   230  	tag[Role] = instancesRole
   231  	if expectCount == "" {
   232  		count = -1
   233  	} else {
   234  		count, _ = strconv.Atoi(expectCount)
   235  	}
   236  	instancesTags := CreateDescribeInstancesTag(tag)
   237  	request := ecs.CreateDescribeInstancesRequest()
   238  	request.Scheme = Scheme
   239  	request.RegionId = a.Config.RegionID
   240  	request.VSwitchId = a.Cluster.Annotations[VSwitchID]
   241  	request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID]
   242  	request.Tag = &instancesTags
   243  	//response, err := d.Client.DescribeInstances(request)
   244  	response := ecs.CreateDescribeInstancesResponse()
   245  	err = a.TryGetInstance(request, response, count)
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  
   250  	for _, instance := range response.Instances.Instance {
   251  		instances = append(instances,
   252  			Instance{
   253  				CPU:              instance.Cpu,
   254  				Memory:           instance.Memory / 1024,
   255  				InstanceID:       instance.InstanceId,
   256  				PrimaryIPAddress: instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress})
   257  	}
   258  	return
   259  }
   260  
   261  func (a *AliProvider) ReconcileInstances(instanceRole string) error {
   262  	var hosts *v1.Hosts
   263  	var instances []Instance
   264  	var instancesIDs string
   265  	switch instanceRole {
   266  	case Master:
   267  		hosts = &a.Cluster.Spec.Masters
   268  		instancesIDs = a.Cluster.Annotations[AliMasterIDs]
   269  		if hosts.Count == "" {
   270  			return errors.New("master count not set")
   271  		}
   272  	case Node:
   273  		hosts = &a.Cluster.Spec.Nodes
   274  		instancesIDs = a.Cluster.Annotations[AliNodeIDs]
   275  		if hosts.Count == "" {
   276  			return nil
   277  		}
   278  	}
   279  	if hosts == nil {
   280  		return errors.New("hosts not set")
   281  	}
   282  	i, err := strconv.Atoi(hosts.Count)
   283  	if err != nil {
   284  		return fmt.Errorf("failed to get hosts count, %v", err)
   285  	}
   286  	if instancesIDs != "" {
   287  		instances, err = a.GetInstancesInfo(instanceRole, JustGetInstanceInfo)
   288  	}
   289  
   290  	if err != nil {
   291  		return err
   292  	}
   293  	if len(instances) < i {
   294  		err = a.RunInstances(instanceRole, i-len(instances))
   295  		if err != nil {
   296  			return err
   297  		}
   298  		ipList, err := a.InputIPlist(instanceRole)
   299  		if err != nil {
   300  			return err
   301  		}
   302  		hosts.IPList = utils.AppendDiffSlice(hosts.IPList, ipList)
   303  		logger.Info("get scale up IP list %v, append iplist %v, host count %s", ipList, hosts.IPList, hosts.Count)
   304  	} else if len(instances) > i {
   305  		var deleteInstancesIDs []string
   306  		var count int
   307  		for _, instance := range instances {
   308  			if instance.InstanceID != a.Cluster.Annotations[Master0ID] {
   309  				deleteInstancesIDs = append(deleteInstancesIDs, instance.InstanceID)
   310  				count++
   311  			}
   312  			if count == (len(instances) - i) {
   313  				break
   314  			}
   315  		}
   316  		if len(deleteInstancesIDs) != 0 {
   317  			a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = strings.Join(deleteInstancesIDs, ",")
   318  			err = a.DeleteInstances()
   319  			if err != nil {
   320  				return err
   321  			}
   322  			a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = ""
   323  		}
   324  
   325  		ipList, err := a.InputIPlist(instanceRole)
   326  		if err != nil {
   327  			return err
   328  		}
   329  		hosts.IPList = utils.ReduceStrSlice(hosts.IPList, ipList)
   330  	}
   331  
   332  	cpu, err := strconv.Atoi(hosts.CPU)
   333  	if err != nil {
   334  		return fmt.Errorf("failed to get hosts CPU, %v", err)
   335  	}
   336  
   337  	memory, err := strconv.Atoi(hosts.Memory)
   338  	if err != nil {
   339  		return fmt.Errorf("failed to get hosts memory, %v", err)
   340  	}
   341  	for _, instance := range instances {
   342  		if instance.CPU != cpu || instance.Memory != memory {
   343  			err = a.ChangeInstanceType(instance.InstanceID, hosts.CPU, hosts.Memory)
   344  			if err != nil {
   345  				return err
   346  			}
   347  		}
   348  	}
   349  
   350  	logger.Info("reconcile %s instances success %v ", instanceRole, hosts.IPList)
   351  	return nil
   352  }
   353  
   354  func (a *AliProvider) DeleteInstances() error {
   355  	instanceIDs := strings.Split(a.Cluster.Annotations[ShouldBeDeleteInstancesIDs], ",")
   356  	if len(instanceIDs) == 0 {
   357  		return nil
   358  	}
   359  	request := ecs.CreateDeleteInstancesRequest()
   360  	request.Scheme = Scheme
   361  	request.InstanceId = &instanceIDs
   362  	request.Force = requests.NewBoolean(true)
   363  	//_, err := d.Client.DeleteInstances(request)
   364  	response := ecs.CreateDeleteInstancesResponse()
   365  	err := a.RetryEcsRequest(request, response)
   366  	if err != nil {
   367  		return err
   368  	}
   369  	a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = ""
   370  	return nil
   371  }
   372  
   373  func CreateDescribeInstancesTag(tags map[string]string) (instanceTags []ecs.DescribeInstancesTag) {
   374  	for k, v := range tags {
   375  		instanceTags = append(instanceTags, ecs.DescribeInstancesTag{Key: k, Value: v})
   376  	}
   377  	return
   378  }
   379  
   380  func CreateInstanceDataDisk(dataDisks []string) (instanceDisks []ecs.RunInstancesDataDisk) {
   381  	for _, v := range dataDisks {
   382  		instanceDisks = append(instanceDisks,
   383  			ecs.RunInstancesDataDisk{Size: v, Category: AliCloudEssd})
   384  	}
   385  	return
   386  }
   387  
   388  func (a *AliProvider) GetAvailableResource(cores int, memory float64) (instanceType []string, err error) {
   389  	request := ecs.CreateDescribeAvailableResourceRequest()
   390  	request.Scheme = Scheme
   391  	request.RegionId = a.Config.RegionID
   392  	request.ZoneId = a.Cluster.GetAnnotationsByKey(ZoneID)
   393  	request.DestinationResource = DestinationResource
   394  	request.InstanceChargeType = InstanceChargeType
   395  	request.Cores = requests.NewInteger(cores)
   396  	request.Memory = requests.NewFloat(memory)
   397  
   398  	//response, err := d.Client.DescribeAvailableResource(request)
   399  	response := ecs.CreateDescribeAvailableResourceResponse()
   400  	err = a.RetryEcsRequest(request, response)
   401  	if err != nil {
   402  		return nil, err
   403  	}
   404  
   405  	if len(response.AvailableZones.AvailableZone) < 1 {
   406  		return nil, fmt.Errorf("resources not find")
   407  	}
   408  	for _, f := range response.AvailableZones.AvailableZone[0].AvailableResources.AvailableResource {
   409  		for _, r := range f.SupportedResources.SupportedResource {
   410  			if r.StatusCategory == AvailableTypeStatus {
   411  				instanceType = append(instanceType, r.Value)
   412  			}
   413  		}
   414  	}
   415  	return
   416  }
   417  
   418  func (a *AliProvider) RunInstances(instanceRole string, count int) error {
   419  	var hosts *v1.Hosts
   420  	switch instanceRole {
   421  	case Master:
   422  		hosts = &a.Cluster.Spec.Masters
   423  	case Node:
   424  		hosts = &a.Cluster.Spec.Nodes
   425  	}
   426  	instances := hosts
   427  	if instances == nil {
   428  		return errors.New("host not set")
   429  	}
   430  	instancesCPU, _ := strconv.Atoi(instances.CPU)
   431  	instancesMemory, _ := strconv.ParseFloat(instances.Memory, 64)
   432  	systemDiskSize := instances.SystemDisk
   433  	instanceType, err := a.GetAvailableResource(instancesCPU, instancesMemory)
   434  	if err != nil {
   435  		return err
   436  	}
   437  	tag := make(map[string]string)
   438  	tag[Product] = a.Cluster.Name
   439  	tag[Role] = instanceRole
   440  	instancesTag := CreateInstanceTag(tag)
   441  
   442  	dataDisks := instances.DataDisks
   443  	datadisk := CreateInstanceDataDisk(dataDisks)
   444  
   445  	request := ecs.CreateRunInstancesRequest()
   446  	request.Scheme = Scheme
   447  	request.ImageId = ImageID
   448  	request.Password = a.Cluster.Spec.SSH.Passwd
   449  	request.SecurityGroupId = a.Cluster.GetAnnotationsByKey(SecurityGroupID)
   450  	request.VSwitchId = a.Cluster.GetAnnotationsByKey(VSwitchID)
   451  	request.SystemDiskSize = systemDiskSize
   452  	request.SystemDiskCategory = DataCategory
   453  	request.DataDisk = &datadisk
   454  	request.Amount = requests.NewInteger(count)
   455  	request.Tag = &instancesTag
   456  
   457  	//response, err := d.Client.RunInstances(request)
   458  	response := ecs.CreateRunInstancesResponse()
   459  	err = a.RetryEcsInstanceType(request, response, instanceType)
   460  	if err != nil {
   461  		return err
   462  	}
   463  
   464  	instancesIDs := strings.Join(response.InstanceIdSets.InstanceIdSet, ",")
   465  	switch instanceRole {
   466  	case Master:
   467  		a.Cluster.Annotations[AliMasterIDs] += instancesIDs
   468  	case Node:
   469  		a.Cluster.Annotations[AliNodeIDs] += instancesIDs
   470  	}
   471  
   472  	return nil
   473  }
   474  
   475  func (a *AliProvider) AuthorizeSecurityGroup(securityGroupID, portRange string) bool {
   476  	request := ecs.CreateAuthorizeSecurityGroupRequest()
   477  	request.Scheme = Scheme
   478  	request.SecurityGroupId = securityGroupID
   479  	request.IpProtocol = IPProtocol
   480  	request.PortRange = portRange
   481  	request.SourceCidrIp = SourceCidrIP
   482  	request.Policy = Policy
   483  
   484  	//response, err := d.Client.AuthorizeSecurityGroup(request)
   485  	response := ecs.CreateAuthorizeSecurityGroupResponse()
   486  	err := a.RetryEcsRequest(request, response)
   487  	if err != nil {
   488  		logger.Error("%v", err)
   489  		return false
   490  	}
   491  	return response.BaseResponse.IsSuccess()
   492  }
   493  
   494  func (a *AliProvider) CreateSecurityGroup() error {
   495  	request := ecs.CreateCreateSecurityGroupRequest()
   496  	request.Scheme = Scheme
   497  	request.RegionId = a.Config.RegionID
   498  	request.VpcId = a.Cluster.GetAnnotationsByKey(VpcID)
   499  	//response, err := d.Client.CreateSecurityGroup(request)
   500  	response := ecs.CreateCreateSecurityGroupResponse()
   501  	err := a.RetryEcsRequest(request, response)
   502  	if err != nil {
   503  		return err
   504  	}
   505  
   506  	if !a.AuthorizeSecurityGroup(response.SecurityGroupId, SSHPortRange) {
   507  		return fmt.Errorf("authorize securitygroup ssh port failed")
   508  	}
   509  	if !a.AuthorizeSecurityGroup(response.SecurityGroupId, APIServerPortRange) {
   510  		return fmt.Errorf("authorize securitygroup apiserver port failed")
   511  	}
   512  	a.Cluster.Annotations[SecurityGroupID] = response.SecurityGroupId
   513  	return nil
   514  }
   515  
   516  func (a *AliProvider) DeleteSecurityGroup() error {
   517  	request := ecs.CreateDeleteSecurityGroupRequest()
   518  	request.Scheme = Scheme
   519  	request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID]
   520  
   521  	//response, err := d.Client.DeleteSecurityGroup(request)
   522  	response := ecs.CreateDeleteSecurityGroupResponse()
   523  	return a.RetryEcsRequest(request, response)
   524  }
   525  
   526  func CreateInstanceTag(tags map[string]string) (instanceTags []ecs.RunInstancesTag) {
   527  	for k, v := range tags {
   528  		instanceTags = append(instanceTags, ecs.RunInstancesTag{Key: k, Value: v})
   529  	}
   530  	return
   531  }
   532  
   533  func LoadConfig(config *Config) error {
   534  	config.AccessKey = os.Getenv(AccessKey)
   535  	config.AccessSecret = os.Getenv(AccessSecret)
   536  	config.RegionID = os.Getenv(RegionID)
   537  	if config.RegionID == "" {
   538  		config.RegionID = DefaultRegionID
   539  	}
   540  	if config.AccessKey == "" || config.AccessSecret == "" || config.RegionID == "" {
   541  		return fmt.Errorf("please set accessKey and accessKeySecret ENV, example: export ACCESSKEYID=xxx export ACCESSKEYSECRET=xxx , how to get AK SK: https://ram.console.aliyun.com/manage/ak")
   542  	}
   543  	return nil
   544  }