github.com/sealerio/sealer@v0.11.1-0.20240507115618-f4f89c5853ae/pkg/infra/aliyun/ali_ecs.go (about) 1 // Copyright © 2021 Alibaba Group Holding Ltd. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package aliyun 16 17 import ( 18 "errors" 19 "fmt" 20 "math/rand" 21 "os" 22 "strconv" 23 "strings" 24 "time" 25 26 "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests" 27 "github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses" 28 "github.com/aliyun/alibaba-cloud-sdk-go/services/ecs" 29 "github.com/sirupsen/logrus" 30 31 v1 "github.com/sealerio/sealer/types/api/v1" 32 "github.com/sealerio/sealer/utils" 33 utilsnet "github.com/sealerio/sealer/utils/net" 34 strUtils "github.com/sealerio/sealer/utils/strings" 35 ) 36 37 type Instance struct { 38 CPU int 39 Memory int 40 InstanceID string 41 PrimaryIPAddress string 42 } 43 44 type EcsManager struct { 45 /* config Config 46 client *ecs.Client*/ 47 } 48 49 func (a *AliProvider) RetryEcsRequest(request requests.AcsRequest, response responses.AcsResponse) error { 50 return a.RetryEcsAction(request, response, TryTimes) 51 } 52 53 func (a *AliProvider) RetryEcsAction(request requests.AcsRequest, response responses.AcsResponse, tryTimes int) error { 54 return utils.Retry(tryTimes, TrySleepTime, func() error { 55 return a.EcsClient.DoAction(request, response) 56 }) 57 } 58 59 func (a *AliProvider) RetryEcsInstanceType(request requests.AcsRequest, response responses.AcsResponse, instances []string) error { 60 for i := 0; i < len(instances); i++ { 61 switch req := request.(type) { 62 case *ecs.ModifyInstanceSpecRequest: 63 req.InstanceType = instances[i] 64 case *ecs.RunInstancesRequest: 65 req.InstanceType = instances[i] 66 } 67 err := a.RetryEcsAction(request, response, 4) 68 if err == nil { 69 logrus.Debugf("use instance type: %s", instances[i]) 70 break 71 } else if i == len(instances)-1 { 72 return fmt.Errorf("failed to get ecs instance type, %v", err) 73 } 74 } 75 return nil 76 } 77 78 func (a *AliProvider) TryGetInstance(request *ecs.DescribeInstancesRequest, response *ecs.DescribeInstancesResponse, expectCount int) error { 79 return utils.Retry(TryTimes, TrySleepTime, func() error { 80 if err := a.EcsClient.DoAction(request, response); err != nil { 81 return err 82 } 83 var ipList []string 84 instances := response.Instances.Instance 85 if expectCount == -1 { 86 return nil 87 } 88 89 if len(instances) != expectCount { 90 return errors.New("the number of instances is not as expected") 91 } 92 for _, instance := range instances { 93 if instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress == "" { 94 return errors.New("PrimaryIpAddress cannot nob be nil") 95 } 96 if len(ipList) != 0 && strUtils.IsInSlice(instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress, ipList) { 97 return errors.New("PrimaryIpAddress cannot nob be same") 98 } 99 100 ipList = append(ipList, instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress) 101 } 102 103 return nil 104 }) 105 } 106 107 func (a *AliProvider) InputIPlist(instanceRole string) (ipList []string, err error) { 108 var hosts *v1.Hosts 109 switch instanceRole { 110 case Master: 111 hosts = &a.Cluster.Spec.Masters 112 case Node: 113 hosts = &a.Cluster.Spec.Nodes 114 } 115 if hosts == nil { 116 return nil, err 117 } 118 instances, err := a.GetInstancesInfo(instanceRole, hosts.Count) 119 if err != nil { 120 return nil, err 121 } 122 for _, instance := range instances { 123 ipList = append(ipList, instance.PrimaryIPAddress) 124 } 125 return ipList, nil 126 } 127 128 func (a *AliProvider) CreatePassword() { 129 rand.Seed(time.Now().UnixNano()) 130 digits := Digits 131 specials := Specials 132 letter := Letter 133 all := digits + specials + letter 134 length := PasswordLength 135 buf := make([]byte, length) 136 // #nosec 137 buf[0] = digits[rand.Intn(len(digits))] 138 // #nosec 139 buf[1] = specials[rand.Intn(len(specials))] 140 for i := 2; i < length; i++ { 141 // #nosec 142 buf[i] = all[rand.Intn(len(all))] 143 } 144 rand.Shuffle(len(buf), func(i, j int) { 145 buf[i], buf[j] = buf[j], buf[i] 146 }) 147 a.Cluster.Spec.SSH.Passwd = string(buf) 148 } 149 150 func (a *AliProvider) GetInstanceStatus(instanceID string) (instanceStatus string, err error) { 151 request := ecs.CreateDescribeInstanceStatusRequest() 152 request.Scheme = Scheme 153 request.InstanceId = &[]string{instanceID} 154 155 //response, err := d.Client.DescribeInstanceStatus(request) 156 response := ecs.CreateDescribeInstanceStatusResponse() 157 err = a.RetryEcsRequest(request, response) 158 if err != nil { 159 return "", fmt.Errorf("get instance status failed %v , error :%v", instanceID, err) 160 } 161 if len(response.InstanceStatuses.InstanceStatus) == 0 { 162 return "", fmt.Errorf("instance list is empty") 163 } 164 return response.InstanceStatuses.InstanceStatus[0].Status, nil 165 } 166 167 func (a *AliProvider) PoweroffInstance(instanceID string) error { 168 request := ecs.CreateStopInstancesRequest() 169 request.Scheme = Scheme 170 request.InstanceId = &[]string{instanceID} 171 172 //_, err := d.Client.StopInstances(request) 173 response := ecs.CreateStopInstancesResponse() 174 return a.RetryEcsRequest(request, response) 175 } 176 177 func (a *AliProvider) StartInstance(instanceID string) error { 178 request := ecs.CreateStartInstanceRequest() 179 request.Scheme = Scheme 180 request.InstanceId = instanceID 181 182 //_, err := d.Client.StartInstance(request) 183 response := ecs.CreateStartInstanceResponse() 184 return a.RetryEcsRequest(request, response) 185 } 186 187 func (a *AliProvider) ChangeInstanceType(instanceID, cpu, memory string) error { 188 cpuInt, err := strconv.Atoi(cpu) 189 if err != nil { 190 return err 191 } 192 memoryFloat, err := strconv.ParseFloat(memory, 64) 193 if err != nil { 194 return err 195 } 196 instanceStatus, err := a.GetInstanceStatus(instanceID) 197 if err != nil { 198 return err 199 } 200 if instanceStatus != Stopped { 201 err = a.PoweroffInstance(instanceID) 202 if err != nil { 203 return err 204 } 205 } 206 expectInstanceType, err := a.GetAvailableResource(cpuInt, memoryFloat) 207 if err != nil { 208 return err 209 } 210 211 request := ecs.CreateModifyInstanceSpecRequest() 212 request.Scheme = Scheme 213 request.InstanceId = instanceID 214 //_, err = d.Client.ModifyInstanceSpec(request) 215 response := ecs.CreateModifyInstanceSpecResponse() 216 err = a.RetryEcsInstanceType(request, response, expectInstanceType) 217 if err != nil { 218 return err 219 } 220 return a.StartInstance(instanceID) 221 } 222 223 func (a *AliProvider) GetInstancesInfo(instancesRole, expectCount string) (instances []Instance, err error) { 224 var count int 225 tag := make(map[string]string) 226 tag[Product] = a.Cluster.Name 227 tag[Role] = instancesRole 228 if expectCount == "" { 229 count = -1 230 } else { 231 count, _ = strconv.Atoi(expectCount) 232 } 233 instancesTags := CreateDescribeInstancesTag(tag) 234 request := ecs.CreateDescribeInstancesRequest() 235 request.Scheme = Scheme 236 request.RegionId = a.Config.RegionID 237 request.VSwitchId = a.Cluster.Annotations[VSwitchID] 238 request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID] 239 request.Tag = &instancesTags 240 //response, err := d.Client.DescribeInstances(request) 241 response := ecs.CreateDescribeInstancesResponse() 242 err = a.TryGetInstance(request, response, count) 243 if err != nil { 244 return nil, err 245 } 246 247 for _, instance := range response.Instances.Instance { 248 instances = append(instances, 249 Instance{ 250 CPU: instance.Cpu, 251 Memory: instance.Memory / 1024, 252 InstanceID: instance.InstanceId, 253 PrimaryIPAddress: instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress}) 254 } 255 return 256 } 257 258 func (a *AliProvider) ReconcileInstances(instanceRole string) error { 259 var hosts *v1.Hosts 260 var instances []Instance 261 var instancesIDs string 262 switch instanceRole { 263 case Master: 264 hosts = &a.Cluster.Spec.Masters 265 instancesIDs = a.Cluster.Annotations[AliMasterIDs] 266 if hosts.Count == "" { 267 return errors.New("master count not set") 268 } 269 case Node: 270 hosts = &a.Cluster.Spec.Nodes 271 instancesIDs = a.Cluster.Annotations[AliNodeIDs] 272 if hosts.Count == "" { 273 return nil 274 } 275 } 276 if hosts == nil { 277 return errors.New("hosts not set") 278 } 279 i, err := strconv.Atoi(hosts.Count) 280 if err != nil { 281 return fmt.Errorf("failed to get hosts count, %v", err) 282 } 283 if instancesIDs != "" { 284 instances, err = a.GetInstancesInfo(instanceRole, JustGetInstanceInfo) 285 } 286 287 if err != nil { 288 return err 289 } 290 if len(instances) < i { 291 err = a.RunInstances(instanceRole, i-len(instances)) 292 if err != nil { 293 return err 294 } 295 ipList, err := a.InputIPlist(instanceRole) 296 if err != nil { 297 return err 298 } 299 IPStrList := utilsnet.IPsToIPStrs(hosts.IPList) 300 hosts.IPList = utilsnet.IPStrsToIPs(strUtils.NewComparator(IPStrList, ipList).GetUnion()) 301 logrus.Infof("get scale up IP list %v, append iplist %v, host count %s", ipList, hosts.IPList, hosts.Count) 302 } else if len(instances) > i { 303 var deleteInstancesIDs []string 304 var count int 305 for _, instance := range instances { 306 if instance.InstanceID != a.Cluster.Annotations[Master0ID] { 307 deleteInstancesIDs = append(deleteInstancesIDs, instance.InstanceID) 308 count++ 309 } 310 if count == (len(instances) - i) { 311 break 312 } 313 } 314 if len(deleteInstancesIDs) != 0 { 315 a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = strings.Join(deleteInstancesIDs, ",") 316 err = a.DeleteInstances() 317 if err != nil { 318 return err 319 } 320 a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = "" 321 } 322 323 ipList, err := a.InputIPlist(instanceRole) 324 if err != nil { 325 return err 326 } 327 IPStrList := utilsnet.IPsToIPStrs(hosts.IPList) 328 hosts.IPList = utilsnet.IPStrsToIPs(strUtils.NewComparator(IPStrList, ipList).GetIntersection()) 329 } 330 331 cpu, err := strconv.Atoi(hosts.CPU) 332 if err != nil { 333 return fmt.Errorf("failed to get hosts CPU, %v", err) 334 } 335 336 memory, err := strconv.Atoi(hosts.Memory) 337 if err != nil { 338 return fmt.Errorf("failed to get hosts memory, %v", err) 339 } 340 for _, instance := range instances { 341 if instance.CPU != cpu || instance.Memory != memory { 342 err = a.ChangeInstanceType(instance.InstanceID, hosts.CPU, hosts.Memory) 343 if err != nil { 344 return err 345 } 346 } 347 } 348 349 logrus.Infof("reconcile %s instances success %v ", instanceRole, hosts.IPList) 350 return nil 351 } 352 353 func (a *AliProvider) DeleteInstances() error { 354 instanceIDs := strings.Split(a.Cluster.Annotations[ShouldBeDeleteInstancesIDs], ",") 355 if len(instanceIDs) == 0 { 356 return nil 357 } 358 request := ecs.CreateDeleteInstancesRequest() 359 request.Scheme = Scheme 360 request.InstanceId = &instanceIDs 361 request.Force = requests.NewBoolean(true) 362 //_, err := d.Client.DeleteInstances(request) 363 response := ecs.CreateDeleteInstancesResponse() 364 if err := a.RetryEcsRequest(request, response); err != nil { 365 return err 366 } 367 368 a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = "" 369 return nil 370 } 371 372 func CreateDescribeInstancesTag(tags map[string]string) (instanceTags []ecs.DescribeInstancesTag) { 373 for k, v := range tags { 374 instanceTags = append(instanceTags, ecs.DescribeInstancesTag{Key: k, Value: v}) 375 } 376 return 377 } 378 379 func CreateInstanceDataDisk(dataDisks []string) (instanceDisks []ecs.RunInstancesDataDisk) { 380 for _, v := range dataDisks { 381 instanceDisks = append(instanceDisks, 382 ecs.RunInstancesDataDisk{Size: v, Category: AliCloudEssd}) 383 } 384 return 385 } 386 387 func (a *AliProvider) GetAvailableResource(cores int, memory float64) (instanceType []string, err error) { 388 request := ecs.CreateDescribeAvailableResourceRequest() 389 request.Scheme = Scheme 390 request.RegionId = a.Config.RegionID 391 request.ZoneId = a.Cluster.GetAnnotationsByKey(ZoneID) 392 request.DestinationResource = DestinationResource 393 request.InstanceChargeType = InstanceChargeType 394 request.Cores = requests.NewInteger(cores) 395 request.Memory = requests.NewFloat(memory) 396 397 //response, err := d.Client.DescribeAvailableResource(request) 398 response := ecs.CreateDescribeAvailableResourceResponse() 399 err = a.RetryEcsRequest(request, response) 400 if err != nil { 401 return nil, err 402 } 403 404 if len(response.AvailableZones.AvailableZone) < 1 { 405 return nil, fmt.Errorf("resources not find") 406 } 407 for _, f := range response.AvailableZones.AvailableZone[0].AvailableResources.AvailableResource { 408 for _, r := range f.SupportedResources.SupportedResource { 409 if r.StatusCategory == AvailableTypeStatus { 410 instanceType = append(instanceType, r.Value) 411 } 412 } 413 } 414 return 415 } 416 417 func (a *AliProvider) RunInstances(instanceRole string, count int) error { 418 var hosts *v1.Hosts 419 switch instanceRole { 420 case Master: 421 hosts = &a.Cluster.Spec.Masters 422 case Node: 423 hosts = &a.Cluster.Spec.Nodes 424 } 425 instances := hosts 426 if instances == nil { 427 return errors.New("host not set") 428 } 429 instancesCPU, _ := strconv.Atoi(instances.CPU) 430 instancesMemory, _ := strconv.ParseFloat(instances.Memory, 64) 431 systemDiskSize := instances.SystemDisk 432 instanceType, err := a.GetAvailableResource(instancesCPU, instancesMemory) 433 if err != nil { 434 return err 435 } 436 tag := make(map[string]string) 437 tag[Product] = a.Cluster.Name 438 tag[Role] = instanceRole 439 instancesTag := CreateInstanceTag(tag) 440 441 dataDisks := instances.DataDisks 442 datadisk := CreateInstanceDataDisk(dataDisks) 443 444 request := ecs.CreateRunInstancesRequest() 445 request.Scheme = Scheme 446 request.ImageId = ImageID 447 request.Password = a.Cluster.Spec.SSH.Passwd 448 request.SecurityGroupId = a.Cluster.GetAnnotationsByKey(SecurityGroupID) 449 request.VSwitchId = a.Cluster.GetAnnotationsByKey(VSwitchID) 450 request.SystemDiskSize = systemDiskSize 451 request.SystemDiskCategory = DataCategory 452 request.DataDisk = &datadisk 453 request.Amount = requests.NewInteger(count) 454 request.Tag = &instancesTag 455 456 //response, err := d.Client.RunInstances(request) 457 response := ecs.CreateRunInstancesResponse() 458 err = a.RetryEcsInstanceType(request, response, instanceType) 459 if err != nil { 460 return err 461 } 462 463 instancesIDs := strings.Join(response.InstanceIdSets.InstanceIdSet, ",") 464 switch instanceRole { 465 case Master: 466 a.Cluster.Annotations[AliMasterIDs] += instancesIDs 467 case Node: 468 a.Cluster.Annotations[AliNodeIDs] += instancesIDs 469 } 470 471 return nil 472 } 473 474 func (a *AliProvider) AuthorizeSecurityGroup(securityGroupID, portRange string) bool { 475 request := ecs.CreateAuthorizeSecurityGroupRequest() 476 request.Scheme = Scheme 477 request.SecurityGroupId = securityGroupID 478 request.IpProtocol = IPProtocol 479 request.PortRange = portRange 480 request.SourceCidrIp = SourceCidrIP 481 request.Policy = Policy 482 483 //response, err := d.Client.AuthorizeSecurityGroup(request) 484 response := ecs.CreateAuthorizeSecurityGroupResponse() 485 if err := a.RetryEcsRequest(request, response); err != nil { 486 logrus.Errorf("%v", err) 487 return false 488 } 489 return response.BaseResponse.IsSuccess() 490 } 491 492 func (a *AliProvider) CreateSecurityGroup() error { 493 request := ecs.CreateCreateSecurityGroupRequest() 494 request.Scheme = Scheme 495 request.RegionId = a.Config.RegionID 496 request.VpcId = a.Cluster.GetAnnotationsByKey(VpcID) 497 //response, err := d.Client.CreateSecurityGroup(request) 498 response := ecs.CreateCreateSecurityGroupResponse() 499 if err := a.RetryEcsRequest(request, response); err != nil { 500 return err 501 } 502 503 if !a.AuthorizeSecurityGroup(response.SecurityGroupId, SSHPortRange) { 504 return fmt.Errorf("authorize securitygroup ssh port failed") 505 } 506 if !a.AuthorizeSecurityGroup(response.SecurityGroupId, APIServerPortRange) { 507 return fmt.Errorf("authorize securitygroup apiserver port failed") 508 } 509 a.Cluster.Annotations[SecurityGroupID] = response.SecurityGroupId 510 return nil 511 } 512 513 func (a *AliProvider) DeleteSecurityGroup() error { 514 request := ecs.CreateDeleteSecurityGroupRequest() 515 request.Scheme = Scheme 516 request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID] 517 518 //response, err := d.Client.DeleteSecurityGroup(request) 519 response := ecs.CreateDeleteSecurityGroupResponse() 520 return a.RetryEcsRequest(request, response) 521 } 522 523 func CreateInstanceTag(tags map[string]string) (instanceTags []ecs.RunInstancesTag) { 524 for k, v := range tags { 525 instanceTags = append(instanceTags, ecs.RunInstancesTag{Key: k, Value: v}) 526 } 527 return 528 } 529 530 func LoadConfig(config *Config) error { 531 config.AccessKey = os.Getenv(AccessKey) 532 config.AccessSecret = os.Getenv(AccessSecret) 533 config.RegionID = os.Getenv(RegionID) 534 if config.RegionID == "" { 535 config.RegionID = DefaultRegionID 536 } 537 if config.AccessKey == "" || config.AccessSecret == "" || config.RegionID == "" { 538 return fmt.Errorf("please set accessKey and accessKeySecret ENV, example: export ACCESSKEYID=xxx export ACCESSKEYSECRET=xxx , how to get AK SK: https://ram.console.aliyun.com/manage/ak") 539 } 540 return nil 541 }