github.com/alibaba/sealer@v0.8.6-0.20220430115802-37a2bdaa8173/pkg/infra/aliyun/ali_ecs.go (about) 1 // Copyright © 2021 Alibaba Group Holding Ltd. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package aliyun 16 17 import ( 18 "errors" 19 "fmt" 20 "math/rand" 21 "os" 22 "strconv" 23 "strings" 24 "time" 25 26 "github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests" 27 "github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses" 28 "github.com/aliyun/alibaba-cloud-sdk-go/services/ecs" 29 30 "github.com/alibaba/sealer/logger" 31 v1 "github.com/alibaba/sealer/types/api/v1" 32 "github.com/alibaba/sealer/utils" 33 ) 34 35 type Instance struct { 36 CPU int 37 Memory int 38 InstanceID string 39 PrimaryIPAddress string 40 } 41 42 type EcsManager struct { 43 /* config Config 44 client *ecs.Client*/ 45 } 46 47 func (a *AliProvider) RetryEcsRequest(request requests.AcsRequest, response responses.AcsResponse) error { 48 return a.RetryEcsAction(request, response, TryTimes) 49 } 50 51 func (a *AliProvider) RetryEcsAction(request requests.AcsRequest, response responses.AcsResponse, tryTimes int) error { 52 return utils.Retry(tryTimes, TrySleepTime, func() error { 53 err := a.EcsClient.DoAction(request, response) 54 if err != nil { 55 return err 56 } 57 return nil 58 }) 59 } 60 61 func (a *AliProvider) RetryEcsInstanceType(request requests.AcsRequest, response responses.AcsResponse, instances []string) error { 62 for i := 0; i < len(instances); i++ { 63 switch req := request.(type) { 64 case *ecs.ModifyInstanceSpecRequest: 65 req.InstanceType = instances[i] 66 case *ecs.RunInstancesRequest: 67 req.InstanceType = instances[i] 68 } 69 err := a.RetryEcsAction(request, response, 4) 70 if err == nil { 71 logger.Debug("use instance type: %s", instances[i]) 72 break 73 } else if i == len(instances)-1 { 74 return fmt.Errorf("failed to get ecs instance type, %v", err) 75 } 76 } 77 return nil 78 } 79 80 func (a *AliProvider) TryGetInstance(request *ecs.DescribeInstancesRequest, response *ecs.DescribeInstancesResponse, expectCount int) error { 81 return utils.Retry(TryTimes, TrySleepTime, func() error { 82 err := a.EcsClient.DoAction(request, response) 83 var ipList []string 84 if err != nil { 85 return err 86 } 87 instances := response.Instances.Instance 88 if expectCount == -1 { 89 return nil 90 } 91 92 if len(instances) != expectCount { 93 return errors.New("the number of instances is not as expected") 94 } 95 for _, instance := range instances { 96 if instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress == "" { 97 return errors.New("PrimaryIpAddress cannt nob be nil") 98 } 99 if len(ipList) != 0 && !utils.NotIn(instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress, ipList) { 100 return errors.New("PrimaryIpAddress cannt nob be same") 101 } 102 103 ipList = append(ipList, instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress) 104 } 105 106 return nil 107 }) 108 } 109 110 func (a *AliProvider) InputIPlist(instanceRole string) (ipList []string, err error) { 111 var hosts *v1.Hosts 112 switch instanceRole { 113 case Master: 114 hosts = &a.Cluster.Spec.Masters 115 case Node: 116 hosts = &a.Cluster.Spec.Nodes 117 } 118 if hosts == nil { 119 return nil, err 120 } 121 instances, err := a.GetInstancesInfo(instanceRole, hosts.Count) 122 if err != nil { 123 return nil, err 124 } 125 for _, instance := range instances { 126 ipList = append(ipList, instance.PrimaryIPAddress) 127 } 128 return ipList, nil 129 } 130 131 func (a *AliProvider) CreatePassword() { 132 rand.Seed(time.Now().UnixNano()) 133 digits := Digits 134 specials := Specials 135 letter := Letter 136 all := digits + specials + letter 137 length := PasswordLength 138 buf := make([]byte, length) 139 // #nosec 140 buf[0] = digits[rand.Intn(len(digits))] 141 // #nosec 142 buf[1] = specials[rand.Intn(len(specials))] 143 for i := 2; i < length; i++ { 144 // #nosec 145 buf[i] = all[rand.Intn(len(all))] 146 } 147 rand.Shuffle(len(buf), func(i, j int) { 148 buf[i], buf[j] = buf[j], buf[i] 149 }) 150 a.Cluster.Spec.SSH.Passwd = string(buf) 151 } 152 153 func (a *AliProvider) GetInstanceStatus(instanceID string) (instanceStatus string, err error) { 154 request := ecs.CreateDescribeInstanceStatusRequest() 155 request.Scheme = Scheme 156 request.InstanceId = &[]string{instanceID} 157 158 //response, err := d.Client.DescribeInstanceStatus(request) 159 response := ecs.CreateDescribeInstanceStatusResponse() 160 err = a.RetryEcsRequest(request, response) 161 if err != nil { 162 return "", fmt.Errorf("get instance status failed %v , error :%v", instanceID, err) 163 } 164 if len(response.InstanceStatuses.InstanceStatus) == 0 { 165 return "", fmt.Errorf("instance list is empty") 166 } 167 return response.InstanceStatuses.InstanceStatus[0].Status, nil 168 } 169 170 func (a *AliProvider) PoweroffInstance(instanceID string) error { 171 request := ecs.CreateStopInstancesRequest() 172 request.Scheme = Scheme 173 request.InstanceId = &[]string{instanceID} 174 175 //_, err := d.Client.StopInstances(request) 176 response := ecs.CreateStopInstancesResponse() 177 return a.RetryEcsRequest(request, response) 178 } 179 180 func (a *AliProvider) StartInstance(instanceID string) error { 181 request := ecs.CreateStartInstanceRequest() 182 request.Scheme = Scheme 183 request.InstanceId = instanceID 184 185 //_, err := d.Client.StartInstance(request) 186 response := ecs.CreateStartInstanceResponse() 187 return a.RetryEcsRequest(request, response) 188 } 189 190 func (a *AliProvider) ChangeInstanceType(instanceID, cpu, memory string) error { 191 cpuInt, err := strconv.Atoi(cpu) 192 if err != nil { 193 return err 194 } 195 memoryFloat, err := strconv.ParseFloat(memory, 64) 196 if err != nil { 197 return err 198 } 199 instanceStatus, err := a.GetInstanceStatus(instanceID) 200 if err != nil { 201 return err 202 } 203 if instanceStatus != Stopped { 204 err = a.PoweroffInstance(instanceID) 205 if err != nil { 206 return err 207 } 208 } 209 expectInstanceType, err := a.GetAvailableResource(cpuInt, memoryFloat) 210 if err != nil { 211 return err 212 } 213 214 request := ecs.CreateModifyInstanceSpecRequest() 215 request.Scheme = Scheme 216 request.InstanceId = instanceID 217 //_, err = d.Client.ModifyInstanceSpec(request) 218 response := ecs.CreateModifyInstanceSpecResponse() 219 err = a.RetryEcsInstanceType(request, response, expectInstanceType) 220 if err != nil { 221 return err 222 } 223 return a.StartInstance(instanceID) 224 } 225 226 func (a *AliProvider) GetInstancesInfo(instancesRole, expectCount string) (instances []Instance, err error) { 227 var count int 228 tag := make(map[string]string) 229 tag[Product] = a.Cluster.Name 230 tag[Role] = instancesRole 231 if expectCount == "" { 232 count = -1 233 } else { 234 count, _ = strconv.Atoi(expectCount) 235 } 236 instancesTags := CreateDescribeInstancesTag(tag) 237 request := ecs.CreateDescribeInstancesRequest() 238 request.Scheme = Scheme 239 request.RegionId = a.Config.RegionID 240 request.VSwitchId = a.Cluster.Annotations[VSwitchID] 241 request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID] 242 request.Tag = &instancesTags 243 //response, err := d.Client.DescribeInstances(request) 244 response := ecs.CreateDescribeInstancesResponse() 245 err = a.TryGetInstance(request, response, count) 246 if err != nil { 247 return nil, err 248 } 249 250 for _, instance := range response.Instances.Instance { 251 instances = append(instances, 252 Instance{ 253 CPU: instance.Cpu, 254 Memory: instance.Memory / 1024, 255 InstanceID: instance.InstanceId, 256 PrimaryIPAddress: instance.NetworkInterfaces.NetworkInterface[0].PrimaryIpAddress}) 257 } 258 return 259 } 260 261 func (a *AliProvider) ReconcileInstances(instanceRole string) error { 262 var hosts *v1.Hosts 263 var instances []Instance 264 var instancesIDs string 265 switch instanceRole { 266 case Master: 267 hosts = &a.Cluster.Spec.Masters 268 instancesIDs = a.Cluster.Annotations[AliMasterIDs] 269 if hosts.Count == "" { 270 return errors.New("master count not set") 271 } 272 case Node: 273 hosts = &a.Cluster.Spec.Nodes 274 instancesIDs = a.Cluster.Annotations[AliNodeIDs] 275 if hosts.Count == "" { 276 return nil 277 } 278 } 279 if hosts == nil { 280 return errors.New("hosts not set") 281 } 282 i, err := strconv.Atoi(hosts.Count) 283 if err != nil { 284 return fmt.Errorf("failed to get hosts count, %v", err) 285 } 286 if instancesIDs != "" { 287 instances, err = a.GetInstancesInfo(instanceRole, JustGetInstanceInfo) 288 } 289 290 if err != nil { 291 return err 292 } 293 if len(instances) < i { 294 err = a.RunInstances(instanceRole, i-len(instances)) 295 if err != nil { 296 return err 297 } 298 ipList, err := a.InputIPlist(instanceRole) 299 if err != nil { 300 return err 301 } 302 hosts.IPList = utils.AppendDiffSlice(hosts.IPList, ipList) 303 logger.Info("get scale up IP list %v, append iplist %v, host count %s", ipList, hosts.IPList, hosts.Count) 304 } else if len(instances) > i { 305 var deleteInstancesIDs []string 306 var count int 307 for _, instance := range instances { 308 if instance.InstanceID != a.Cluster.Annotations[Master0ID] { 309 deleteInstancesIDs = append(deleteInstancesIDs, instance.InstanceID) 310 count++ 311 } 312 if count == (len(instances) - i) { 313 break 314 } 315 } 316 if len(deleteInstancesIDs) != 0 { 317 a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = strings.Join(deleteInstancesIDs, ",") 318 err = a.DeleteInstances() 319 if err != nil { 320 return err 321 } 322 a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = "" 323 } 324 325 ipList, err := a.InputIPlist(instanceRole) 326 if err != nil { 327 return err 328 } 329 hosts.IPList = utils.ReduceStrSlice(hosts.IPList, ipList) 330 } 331 332 cpu, err := strconv.Atoi(hosts.CPU) 333 if err != nil { 334 return fmt.Errorf("failed to get hosts CPU, %v", err) 335 } 336 337 memory, err := strconv.Atoi(hosts.Memory) 338 if err != nil { 339 return fmt.Errorf("failed to get hosts memory, %v", err) 340 } 341 for _, instance := range instances { 342 if instance.CPU != cpu || instance.Memory != memory { 343 err = a.ChangeInstanceType(instance.InstanceID, hosts.CPU, hosts.Memory) 344 if err != nil { 345 return err 346 } 347 } 348 } 349 350 logger.Info("reconcile %s instances success %v ", instanceRole, hosts.IPList) 351 return nil 352 } 353 354 func (a *AliProvider) DeleteInstances() error { 355 instanceIDs := strings.Split(a.Cluster.Annotations[ShouldBeDeleteInstancesIDs], ",") 356 if len(instanceIDs) == 0 { 357 return nil 358 } 359 request := ecs.CreateDeleteInstancesRequest() 360 request.Scheme = Scheme 361 request.InstanceId = &instanceIDs 362 request.Force = requests.NewBoolean(true) 363 //_, err := d.Client.DeleteInstances(request) 364 response := ecs.CreateDeleteInstancesResponse() 365 err := a.RetryEcsRequest(request, response) 366 if err != nil { 367 return err 368 } 369 a.Cluster.Annotations[ShouldBeDeleteInstancesIDs] = "" 370 return nil 371 } 372 373 func CreateDescribeInstancesTag(tags map[string]string) (instanceTags []ecs.DescribeInstancesTag) { 374 for k, v := range tags { 375 instanceTags = append(instanceTags, ecs.DescribeInstancesTag{Key: k, Value: v}) 376 } 377 return 378 } 379 380 func CreateInstanceDataDisk(dataDisks []string) (instanceDisks []ecs.RunInstancesDataDisk) { 381 for _, v := range dataDisks { 382 instanceDisks = append(instanceDisks, 383 ecs.RunInstancesDataDisk{Size: v, Category: AliCloudEssd}) 384 } 385 return 386 } 387 388 func (a *AliProvider) GetAvailableResource(cores int, memory float64) (instanceType []string, err error) { 389 request := ecs.CreateDescribeAvailableResourceRequest() 390 request.Scheme = Scheme 391 request.RegionId = a.Config.RegionID 392 request.ZoneId = a.Cluster.GetAnnotationsByKey(ZoneID) 393 request.DestinationResource = DestinationResource 394 request.InstanceChargeType = InstanceChargeType 395 request.Cores = requests.NewInteger(cores) 396 request.Memory = requests.NewFloat(memory) 397 398 //response, err := d.Client.DescribeAvailableResource(request) 399 response := ecs.CreateDescribeAvailableResourceResponse() 400 err = a.RetryEcsRequest(request, response) 401 if err != nil { 402 return nil, err 403 } 404 405 if len(response.AvailableZones.AvailableZone) < 1 { 406 return nil, fmt.Errorf("resources not find") 407 } 408 for _, f := range response.AvailableZones.AvailableZone[0].AvailableResources.AvailableResource { 409 for _, r := range f.SupportedResources.SupportedResource { 410 if r.StatusCategory == AvailableTypeStatus { 411 instanceType = append(instanceType, r.Value) 412 } 413 } 414 } 415 return 416 } 417 418 func (a *AliProvider) RunInstances(instanceRole string, count int) error { 419 var hosts *v1.Hosts 420 switch instanceRole { 421 case Master: 422 hosts = &a.Cluster.Spec.Masters 423 case Node: 424 hosts = &a.Cluster.Spec.Nodes 425 } 426 instances := hosts 427 if instances == nil { 428 return errors.New("host not set") 429 } 430 instancesCPU, _ := strconv.Atoi(instances.CPU) 431 instancesMemory, _ := strconv.ParseFloat(instances.Memory, 64) 432 systemDiskSize := instances.SystemDisk 433 instanceType, err := a.GetAvailableResource(instancesCPU, instancesMemory) 434 if err != nil { 435 return err 436 } 437 tag := make(map[string]string) 438 tag[Product] = a.Cluster.Name 439 tag[Role] = instanceRole 440 instancesTag := CreateInstanceTag(tag) 441 442 dataDisks := instances.DataDisks 443 datadisk := CreateInstanceDataDisk(dataDisks) 444 445 request := ecs.CreateRunInstancesRequest() 446 request.Scheme = Scheme 447 request.ImageId = ImageID 448 request.Password = a.Cluster.Spec.SSH.Passwd 449 request.SecurityGroupId = a.Cluster.GetAnnotationsByKey(SecurityGroupID) 450 request.VSwitchId = a.Cluster.GetAnnotationsByKey(VSwitchID) 451 request.SystemDiskSize = systemDiskSize 452 request.SystemDiskCategory = DataCategory 453 request.DataDisk = &datadisk 454 request.Amount = requests.NewInteger(count) 455 request.Tag = &instancesTag 456 457 //response, err := d.Client.RunInstances(request) 458 response := ecs.CreateRunInstancesResponse() 459 err = a.RetryEcsInstanceType(request, response, instanceType) 460 if err != nil { 461 return err 462 } 463 464 instancesIDs := strings.Join(response.InstanceIdSets.InstanceIdSet, ",") 465 switch instanceRole { 466 case Master: 467 a.Cluster.Annotations[AliMasterIDs] += instancesIDs 468 case Node: 469 a.Cluster.Annotations[AliNodeIDs] += instancesIDs 470 } 471 472 return nil 473 } 474 475 func (a *AliProvider) AuthorizeSecurityGroup(securityGroupID, portRange string) bool { 476 request := ecs.CreateAuthorizeSecurityGroupRequest() 477 request.Scheme = Scheme 478 request.SecurityGroupId = securityGroupID 479 request.IpProtocol = IPProtocol 480 request.PortRange = portRange 481 request.SourceCidrIp = SourceCidrIP 482 request.Policy = Policy 483 484 //response, err := d.Client.AuthorizeSecurityGroup(request) 485 response := ecs.CreateAuthorizeSecurityGroupResponse() 486 err := a.RetryEcsRequest(request, response) 487 if err != nil { 488 logger.Error("%v", err) 489 return false 490 } 491 return response.BaseResponse.IsSuccess() 492 } 493 494 func (a *AliProvider) CreateSecurityGroup() error { 495 request := ecs.CreateCreateSecurityGroupRequest() 496 request.Scheme = Scheme 497 request.RegionId = a.Config.RegionID 498 request.VpcId = a.Cluster.GetAnnotationsByKey(VpcID) 499 //response, err := d.Client.CreateSecurityGroup(request) 500 response := ecs.CreateCreateSecurityGroupResponse() 501 err := a.RetryEcsRequest(request, response) 502 if err != nil { 503 return err 504 } 505 506 if !a.AuthorizeSecurityGroup(response.SecurityGroupId, SSHPortRange) { 507 return fmt.Errorf("authorize securitygroup ssh port failed") 508 } 509 if !a.AuthorizeSecurityGroup(response.SecurityGroupId, APIServerPortRange) { 510 return fmt.Errorf("authorize securitygroup apiserver port failed") 511 } 512 a.Cluster.Annotations[SecurityGroupID] = response.SecurityGroupId 513 return nil 514 } 515 516 func (a *AliProvider) DeleteSecurityGroup() error { 517 request := ecs.CreateDeleteSecurityGroupRequest() 518 request.Scheme = Scheme 519 request.SecurityGroupId = a.Cluster.Annotations[SecurityGroupID] 520 521 //response, err := d.Client.DeleteSecurityGroup(request) 522 response := ecs.CreateDeleteSecurityGroupResponse() 523 return a.RetryEcsRequest(request, response) 524 } 525 526 func CreateInstanceTag(tags map[string]string) (instanceTags []ecs.RunInstancesTag) { 527 for k, v := range tags { 528 instanceTags = append(instanceTags, ecs.RunInstancesTag{Key: k, Value: v}) 529 } 530 return 531 } 532 533 func LoadConfig(config *Config) error { 534 config.AccessKey = os.Getenv(AccessKey) 535 config.AccessSecret = os.Getenv(AccessSecret) 536 config.RegionID = os.Getenv(RegionID) 537 if config.RegionID == "" { 538 config.RegionID = DefaultRegionID 539 } 540 if config.AccessKey == "" || config.AccessSecret == "" || config.RegionID == "" { 541 return fmt.Errorf("please set accessKey and accessKeySecret ENV, example: export ACCESSKEYID=xxx export ACCESSKEYSECRET=xxx , how to get AK SK: https://ram.console.aliyun.com/manage/ak") 542 } 543 return nil 544 }