github.com/sealerio/sealer@v0.11.1-0.20240507115618-f4f89c5853ae/pkg/infra/aliyun/ali_ecs.go (about)

     1  // Copyright © 2021 Alibaba Group Holding Ltd.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aliyun
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"math/rand"
    21  	"os"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
    27  	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses"
    28  	"github.com/aliyun/alibaba-cloud-sdk-go/services/ecs"
    29  	"github.com/sirupsen/logrus"
    30  
    31  	v1 "github.com/sealerio/sealer/types/api/v1"
    32  	"github.com/sealerio/sealer/utils"
    33  	utilsnet "github.com/sealerio/sealer/utils/net"
    34  	strUtils "github.com/sealerio/sealer/utils/strings"
    35  )
    36  
    37  type Instance struct {
    38  	CPU              int
    39  	Memory           int
    40  	InstanceID       string
    41  	PrimaryIPAddress string
    42  }
    43  
    44  type EcsManager struct {
    45  	/*	config Config
    46  		client *ecs.Client*/
    47  }
    48  
    49  func (a *AliProvider) RetryEcsRequest(request requests.AcsRequest, response responses.AcsResponse) error {
    50  	return a.RetryEcsAction(request, response, TryTimes)
    51  }
    52  
    53  func (a *AliProvider) RetryEcsAction(request requests.AcsRequest, response responses.AcsResponse, tryTimes int) error {
    54  	return utils.Retry(tryTimes, TrySleepTime, func() error {
    55  		return a.EcsClient.DoAction(request, response)
    56  	})
    57  }
    58  
    59  func (a *AliProvider) RetryEcsInstanceType(request requests.AcsRequest, response responses.AcsResponse, instances []string) error {
    60  	for i := 0; i < len(instances); i++ {
    61  		switch req := request.(type) {
    62  		case *ecs.ModifyInstanceSpecRequest:
    63  			req.InstanceType = instances[i]
    64  		case *ecs.RunInstancesRequest:
    65  			req.InstanceType = instances[i]
    66  		}
    67  		err := a.RetryEcsAction(request, response, 4)
    68  		if err == nil {
    69  			logrus.Debugf("use instance type: %s", instances[i])
    70  			break
    71  		} else if i == len(instances)-1 {
    72  			return fmt.Errorf("failed to get ecs instance type, %v", err)
    73  		}
    74  	}
    75  	return nil
    76  }
    77  
    78  func (a *AliProvider) TryGetInstance(request *ecs.DescribeInstancesRequest, response *ecs.DescribeInstancesResponse, expectCount int) error {
    79  	return utils.Retry(TryTimes, TrySleepTime, func() error {
    80  		if err := a.EcsClient.DoAction(request, response); err != nil {
    81  			return err
    82  		}
    83  		var ipList []string
    84  		instances := response.Instances.Instance
    85  		if expectCount == -1 {
    86  			return nil
    87  		}
    88  
    89  		if len(instances) != expectCount {
    90  			return errors.New("the number of instances is not as expected")
    91  		}
    92  		for _, instance := range instances {
    93  			if instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress == "" {
    94  				return errors.New("PrimaryIpAddress cannot nob be nil")
    95  			}
    96  			if len(ipList) != 0 && strUtils.IsInSlice(instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress, ipList) {
    97  				return errors.New("PrimaryIpAddress cannot nob be same")
    98  			}
    99  
   100  			ipList = append(ipList, instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress)
   101  		}
   102  
   103  		return nil
   104  	})
   105  }
   106  
   107  func (a *AliProvider) InputIPlist(instanceRole string) (ipList []string, err error) {
   108  	var hosts *v1.Hosts
   109  	switch instanceRole {
   110  	case Master:
   111  		hosts = &a.Cluster.Spec.Masters
   112  	case Node:
   113  		hosts = &a.Cluster.Spec.Nodes
   114  	}
   115  	if hosts == nil {
   116  		return nil, err
   117  	}
   118  	instances, err := a.GetInstancesInfo(instanceRole, hosts.Count)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  	for _, instance := range instances {
   123  		ipList = append(ipList, instance.PrimaryIPAddress)
   124  	}
   125  	return ipList, nil
   126  }
   127  
   128  func (a *AliProvider) CreatePassword() {
   129  	rand.Seed(time.Now().UnixNano())
   130  	digits := Digits
   131  	specials := Specials
   132  	letter := Letter
   133  	all := digits + specials + letter
   134  	length := PasswordLength
   135  	buf := make([]byte, length)
   136  	// #nosec
   137  	buf[0] = digits[rand.Intn(len(digits))]
   138  	// #nosec
   139  	buf[1] = specials[rand.Intn(len(specials))]
   140  	for i := 2; i < length; i++ {
   141  		// #nosec
   142  		buf[i] = all[rand.Intn(len(all))]
   143  	}
   144  	rand.Shuffle(len(buf), func(i, j int) {
   145  		buf[i], buf[j] = buf[j], buf[i]
   146  	})
   147  	a.Cluster.Spec.SSH.Passwd = string(buf)
   148  }
   149  
   150  func (a *AliProvider) GetInstanceStatus(instanceID string) (instanceStatus string, err error) {
   151  	request := ecs.CreateDescribeInstanceStatusRequest()
   152  	request.Scheme = Scheme
   153  	request.InstanceId = &[]string{instanceID}
   154  
   155  	//response, err := d.Client.DescribeInstanceStatus(request)
   156  	response := ecs.CreateDescribeInstanceStatusResponse()
   157  	err = a.RetryEcsRequest(request, response)
   158  	if err != nil {
   159  		return "", fmt.Errorf("get instance status failed %v , error :%v", instanceID, err)
   160  	}
   161  	if len(response.InstanceStatuses.InstanceStatus) == 0 {
   162  		return "", fmt.Errorf("instance list is empty")
   163  	}
   164  	return response.InstanceStatuses.InstanceStatus[0].Status, nil
   165  }
   166  
   167  func (a *AliProvider) PoweroffInstance(instanceID string) error {
   168  	request := ecs.CreateStopInstancesRequest()
   169  	request.Scheme = Scheme
   170  	request.InstanceId = &[]string{instanceID}
   171  
   172  	//_, err := d.Client.StopInstances(request)
   173  	response := ecs.CreateStopInstancesResponse()
   174  	return a.RetryEcsRequest(request, response)
   175  }
   176  
   177  func (a *AliProvider) StartInstance(instanceID string) error {
   178  	request := ecs.CreateStartInstanceRequest()
   179  	request.Scheme = Scheme
   180  	request.InstanceId = instanceID
   181  
   182  	//_, err := d.Client.StartInstance(request)
   183  	response := ecs.CreateStartInstanceResponse()
   184  	return a.RetryEcsRequest(request, response)
   185  }
   186  
   187  func (a *AliProvider) ChangeInstanceType(instanceID, cpu, memory string) error {
   188  	cpuInt, err := strconv.Atoi(cpu)
   189  	if err != nil {
   190  		return err
   191  	}
   192  	memoryFloat, err := strconv.ParseFloat(memory, 64)
   193  	if err != nil {
   194  		return err
   195  	}
   196  	instanceStatus, err := a.GetInstanceStatus(instanceID)
   197  	if err != nil {
   198  		return err
   199  	}
   200  	if instanceStatus != Stopped {
   201  		err = a.PoweroffInstance(instanceID)
   202  		if err != nil {
   203  			return err
   204  		}
   205  	}
   206  	expectInstanceType, err := a.GetAvailableResource(cpuInt, memoryFloat)
   207  	if err != nil {
   208  		return err
   209  	}
   210  
   211  	request := ecs.CreateModifyInstanceSpecRequest()
   212  	request.Scheme = Scheme
   213  	request.InstanceId = instanceID
   214  	//_, err = d.Client.ModifyInstanceSpec(request)
   215  	response := ecs.CreateModifyInstanceSpecResponse()
   216  	err = a.RetryEcsInstanceType(request, response, expectInstanceType)
   217  	if err != nil {
   218  		return err
   219  	}
   220  	return a.StartInstance(instanceID)
   221  }
   222  
   223  func (a *AliProvider) GetInstancesInfo(instancesRole, expectCount string) (instances []Instance, err error) {
   224  	var count int
   225  	tag := make(map[string]string)
   226  	tag[Product] = a.Cluster.Name
   227  	tag[Role] = instancesRole
   228  	if expectCount == "" {
   229  		count = -1
   230  	} else {
   231  		count, _ = strconv.Atoi(expectCount)
   232  	}
   233  	instancesTags := CreateDescribeInstancesTag(tag)
   234  	request := ecs.CreateDescribeInstancesRequest()
   235  	request.Scheme = Scheme
   236  	request.RegionId = a.Config.RegionID
   237  	request.VSwitchId = a.Cluster.Annotations[VSwitchID]
   238  	request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID]
   239  	request.Tag = &instancesTags
   240  	//response, err := d.Client.DescribeInstances(request)
   241  	response := ecs.CreateDescribeInstancesResponse()
   242  	err = a.TryGetInstance(request, response, count)
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  
   247  	for _, instance := range response.Instances.Instance {
   248  		instances = append(instances,
   249  			Instance{
   250  				CPU:              instance.Cpu,
   251  				Memory:           instance.Memory / 1024,
   252  				InstanceID:       instance.InstanceId,
   253  				PrimaryIPAddress: instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress})
   254  	}
   255  	return
   256  }
   257  
   258  func (a *AliProvider) ReconcileInstances(instanceRole string) error {
   259  	var hosts *v1.Hosts
   260  	var instances []Instance
   261  	var instancesIDs string
   262  	switch instanceRole {
   263  	case Master:
   264  		hosts = &a.Cluster.Spec.Masters
   265  		instancesIDs = a.Cluster.Annotations[AliMasterIDs]
   266  		if hosts.Count == "" {
   267  			return errors.New("master count not set")
   268  		}
   269  	case Node:
   270  		hosts = &a.Cluster.Spec.Nodes
   271  		instancesIDs = a.Cluster.Annotations[AliNodeIDs]
   272  		if hosts.Count == "" {
   273  			return nil
   274  		}
   275  	}
   276  	if hosts == nil {
   277  		return errors.New("hosts not set")
   278  	}
   279  	i, err := strconv.Atoi(hosts.Count)
   280  	if err != nil {
   281  		return fmt.Errorf("failed to get hosts count, %v", err)
   282  	}
   283  	if instancesIDs != "" {
   284  		instances, err = a.GetInstancesInfo(instanceRole, JustGetInstanceInfo)
   285  	}
   286  
   287  	if err != nil {
   288  		return err
   289  	}
   290  	if len(instances) < i {
   291  		err = a.RunInstances(instanceRole, i-len(instances))
   292  		if err != nil {
   293  			return err
   294  		}
   295  		ipList, err := a.InputIPlist(instanceRole)
   296  		if err != nil {
   297  			return err
   298  		}
   299  		IPStrList := utilsnet.IPsToIPStrs(hosts.IPList)
   300  		hosts.IPList = utilsnet.IPStrsToIPs(strUtils.NewComparator(IPStrList, ipList).GetUnion())
   301  		logrus.Infof("get scale up IP list %v, append iplist %v, host count %s", ipList, hosts.IPList, hosts.Count)
   302  	} else if len(instances) > i {
   303  		var deleteInstancesIDs []string
   304  		var count int
   305  		for _, instance := range instances {
   306  			if instance.InstanceID != a.Cluster.Annotations[Master0ID] {
   307  				deleteInstancesIDs = append(deleteInstancesIDs, instance.InstanceID)
   308  				count++
   309  			}
   310  			if count == (len(instances) - i) {
   311  				break
   312  			}
   313  		}
   314  		if len(deleteInstancesIDs) != 0 {
   315  			a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = strings.Join(deleteInstancesIDs, ",")
   316  			err = a.DeleteInstances()
   317  			if err != nil {
   318  				return err
   319  			}
   320  			a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = ""
   321  		}
   322  
   323  		ipList, err := a.InputIPlist(instanceRole)
   324  		if err != nil {
   325  			return err
   326  		}
   327  		IPStrList := utilsnet.IPsToIPStrs(hosts.IPList)
   328  		hosts.IPList = utilsnet.IPStrsToIPs(strUtils.NewComparator(IPStrList, ipList).GetIntersection())
   329  	}
   330  
   331  	cpu, err := strconv.Atoi(hosts.CPU)
   332  	if err != nil {
   333  		return fmt.Errorf("failed to get hosts CPU, %v", err)
   334  	}
   335  
   336  	memory, err := strconv.Atoi(hosts.Memory)
   337  	if err != nil {
   338  		return fmt.Errorf("failed to get hosts memory, %v", err)
   339  	}
   340  	for _, instance := range instances {
   341  		if instance.CPU != cpu || instance.Memory != memory {
   342  			err = a.ChangeInstanceType(instance.InstanceID, hosts.CPU, hosts.Memory)
   343  			if err != nil {
   344  				return err
   345  			}
   346  		}
   347  	}
   348  
   349  	logrus.Infof("reconcile %s instances success %v ", instanceRole, hosts.IPList)
   350  	return nil
   351  }
   352  
   353  func (a *AliProvider) DeleteInstances() error {
   354  	instanceIDs := strings.Split(a.Cluster.Annotations[ShouldBeDeleteInstancesIDs], ",")
   355  	if len(instanceIDs) == 0 {
   356  		return nil
   357  	}
   358  	request := ecs.CreateDeleteInstancesRequest()
   359  	request.Scheme = Scheme
   360  	request.InstanceId = &instanceIDs
   361  	request.Force = requests.NewBoolean(true)
   362  	//_, err := d.Client.DeleteInstances(request)
   363  	response := ecs.CreateDeleteInstancesResponse()
   364  	if err := a.RetryEcsRequest(request, response); err != nil {
   365  		return err
   366  	}
   367  
   368  	a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = ""
   369  	return nil
   370  }
   371  
   372  func CreateDescribeInstancesTag(tags map[string]string) (instanceTags []ecs.DescribeInstancesTag) {
   373  	for k, v := range tags {
   374  		instanceTags = append(instanceTags, ecs.DescribeInstancesTag{Key: k, Value: v})
   375  	}
   376  	return
   377  }
   378  
   379  func CreateInstanceDataDisk(dataDisks []string) (instanceDisks []ecs.RunInstancesDataDisk) {
   380  	for _, v := range dataDisks {
   381  		instanceDisks = append(instanceDisks,
   382  			ecs.RunInstancesDataDisk{Size: v, Category: AliCloudEssd})
   383  	}
   384  	return
   385  }
   386  
   387  func (a *AliProvider) GetAvailableResource(cores int, memory float64) (instanceType []string, err error) {
   388  	request := ecs.CreateDescribeAvailableResourceRequest()
   389  	request.Scheme = Scheme
   390  	request.RegionId = a.Config.RegionID
   391  	request.ZoneId = a.Cluster.GetAnnotationsByKey(ZoneID)
   392  	request.DestinationResource = DestinationResource
   393  	request.InstanceChargeType = InstanceChargeType
   394  	request.Cores = requests.NewInteger(cores)
   395  	request.Memory = requests.NewFloat(memory)
   396  
   397  	//response, err := d.Client.DescribeAvailableResource(request)
   398  	response := ecs.CreateDescribeAvailableResourceResponse()
   399  	err = a.RetryEcsRequest(request, response)
   400  	if err != nil {
   401  		return nil, err
   402  	}
   403  
   404  	if len(response.AvailableZones.AvailableZone) < 1 {
   405  		return nil, fmt.Errorf("resources not find")
   406  	}
   407  	for _, f := range response.AvailableZones.AvailableZone[0].AvailableResources.AvailableResource {
   408  		for _, r := range f.SupportedResources.SupportedResource {
   409  			if r.StatusCategory == AvailableTypeStatus {
   410  				instanceType = append(instanceType, r.Value)
   411  			}
   412  		}
   413  	}
   414  	return
   415  }
   416  
   417  func (a *AliProvider) RunInstances(instanceRole string, count int) error {
   418  	var hosts *v1.Hosts
   419  	switch instanceRole {
   420  	case Master:
   421  		hosts = &a.Cluster.Spec.Masters
   422  	case Node:
   423  		hosts = &a.Cluster.Spec.Nodes
   424  	}
   425  	instances := hosts
   426  	if instances == nil {
   427  		return errors.New("host not set")
   428  	}
   429  	instancesCPU, _ := strconv.Atoi(instances.CPU)
   430  	instancesMemory, _ := strconv.ParseFloat(instances.Memory, 64)
   431  	systemDiskSize := instances.SystemDisk
   432  	instanceType, err := a.GetAvailableResource(instancesCPU, instancesMemory)
   433  	if err != nil {
   434  		return err
   435  	}
   436  	tag := make(map[string]string)
   437  	tag[Product] = a.Cluster.Name
   438  	tag[Role] = instanceRole
   439  	instancesTag := CreateInstanceTag(tag)
   440  
   441  	dataDisks := instances.DataDisks
   442  	datadisk := CreateInstanceDataDisk(dataDisks)
   443  
   444  	request := ecs.CreateRunInstancesRequest()
   445  	request.Scheme = Scheme
   446  	request.ImageId = ImageID
   447  	request.Password = a.Cluster.Spec.SSH.Passwd
   448  	request.SecurityGroupId = a.Cluster.GetAnnotationsByKey(SecurityGroupID)
   449  	request.VSwitchId = a.Cluster.GetAnnotationsByKey(VSwitchID)
   450  	request.SystemDiskSize = systemDiskSize
   451  	request.SystemDiskCategory = DataCategory
   452  	request.DataDisk = &datadisk
   453  	request.Amount = requests.NewInteger(count)
   454  	request.Tag = &instancesTag
   455  
   456  	//response, err := d.Client.RunInstances(request)
   457  	response := ecs.CreateRunInstancesResponse()
   458  	err = a.RetryEcsInstanceType(request, response, instanceType)
   459  	if err != nil {
   460  		return err
   461  	}
   462  
   463  	instancesIDs := strings.Join(response.InstanceIdSets.InstanceIdSet, ",")
   464  	switch instanceRole {
   465  	case Master:
   466  		a.Cluster.Annotations[AliMasterIDs] += instancesIDs
   467  	case Node:
   468  		a.Cluster.Annotations[AliNodeIDs] += instancesIDs
   469  	}
   470  
   471  	return nil
   472  }
   473  
   474  func (a *AliProvider) AuthorizeSecurityGroup(securityGroupID, portRange string) bool {
   475  	request := ecs.CreateAuthorizeSecurityGroupRequest()
   476  	request.Scheme = Scheme
   477  	request.SecurityGroupId = securityGroupID
   478  	request.IpProtocol = IPProtocol
   479  	request.PortRange = portRange
   480  	request.SourceCidrIp = SourceCidrIP
   481  	request.Policy = Policy
   482  
   483  	//response, err := d.Client.AuthorizeSecurityGroup(request)
   484  	response := ecs.CreateAuthorizeSecurityGroupResponse()
   485  	if err := a.RetryEcsRequest(request, response); err != nil {
   486  		logrus.Errorf("%v", err)
   487  		return false
   488  	}
   489  	return response.BaseResponse.IsSuccess()
   490  }
   491  
   492  func (a *AliProvider) CreateSecurityGroup() error {
   493  	request := ecs.CreateCreateSecurityGroupRequest()
   494  	request.Scheme = Scheme
   495  	request.RegionId = a.Config.RegionID
   496  	request.VpcId = a.Cluster.GetAnnotationsByKey(VpcID)
   497  	//response, err := d.Client.CreateSecurityGroup(request)
   498  	response := ecs.CreateCreateSecurityGroupResponse()
   499  	if err := a.RetryEcsRequest(request, response); err != nil {
   500  		return err
   501  	}
   502  
   503  	if !a.AuthorizeSecurityGroup(response.SecurityGroupId, SSHPortRange) {
   504  		return fmt.Errorf("authorize securitygroup ssh port failed")
   505  	}
   506  	if !a.AuthorizeSecurityGroup(response.SecurityGroupId, APIServerPortRange) {
   507  		return fmt.Errorf("authorize securitygroup apiserver port failed")
   508  	}
   509  	a.Cluster.Annotations[SecurityGroupID] = response.SecurityGroupId
   510  	return nil
   511  }
   512  
   513  func (a *AliProvider) DeleteSecurityGroup() error {
   514  	request := ecs.CreateDeleteSecurityGroupRequest()
   515  	request.Scheme = Scheme
   516  	request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID]
   517  
   518  	//response, err := d.Client.DeleteSecurityGroup(request)
   519  	response := ecs.CreateDeleteSecurityGroupResponse()
   520  	return a.RetryEcsRequest(request, response)
   521  }
   522  
   523  func CreateInstanceTag(tags map[string]string) (instanceTags []ecs.RunInstancesTag) {
   524  	for k, v := range tags {
   525  		instanceTags = append(instanceTags, ecs.RunInstancesTag{Key: k, Value: v})
   526  	}
   527  	return
   528  }
   529  
   530  func LoadConfig(config *Config) error {
   531  	config.AccessKey = os.Getenv(AccessKey)
   532  	config.AccessSecret = os.Getenv(AccessSecret)
   533  	config.RegionID = os.Getenv(RegionID)
   534  	if config.RegionID == "" {
   535  		config.RegionID = DefaultRegionID
   536  	}
   537  	if config.AccessKey == "" || config.AccessSecret == "" || config.RegionID == "" {
   538  		return fmt.Errorf("please set accessKey and accessKeySecret ENV, example: export ACCESSKEYID=xxx export ACCESSKEYSECRET=xxx , how to get AK SK: https://ram.console.aliyun.com/manage/ak")
   539  	}
   540  	return nil
   541  }