github.com/jenkins-x/jx/v2@v2.1.155/pkg/cmd/step/e2e/step_e2e_gc.go (about)

     1  package e2e
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/jenkins-x/jx/v2/pkg/cluster"
    11  
    12  	"github.com/pkg/errors"
    13  
    14  	"github.com/jenkins-x/jx/v2/pkg/cluster/factory"
    15  
    16  	"github.com/jenkins-x/jx-logging/pkg/log"
    17  	"github.com/jenkins-x/jx/v2/pkg/cloud"
    18  	"github.com/jenkins-x/jx/v2/pkg/cloud/gke"
    19  	"github.com/jenkins-x/jx/v2/pkg/cmd/deletecmd"
    20  	"github.com/jenkins-x/jx/v2/pkg/cmd/gc"
    21  	"github.com/jenkins-x/jx/v2/pkg/cmd/get"
    22  	"github.com/jenkins-x/jx/v2/pkg/cmd/helper"
    23  	"github.com/jenkins-x/jx/v2/pkg/cmd/opts"
    24  	"github.com/jenkins-x/jx/v2/pkg/cmd/opts/step"
    25  	"github.com/jenkins-x/jx/v2/pkg/cmd/templates"
    26  	"github.com/spf13/cobra"
    27  )
    28  
    29  // StepE2EGCOptions contains the command line flags
    30  type StepE2EGCOptions struct {
    31  	step.StepOptions
    32  	ProjectID string
    33  	Providers []string
    34  	Region    string
    35  	Duration  int
    36  }
    37  
    38  var (
    39  	stepE2EGCLong = templates.LongDesc(`
    40  		This pipeline step removes stale E2E test clusters
    41  `)
    42  
    43  	stepE2EGCExample = templates.Examples(`
    44  		# delete stale E2E test clusters
    45  		jx step e2e gc
    46  
    47  `)
    48  )
    49  
    50  // NewCmdStepE2EGC creates the CLI command
    51  func NewCmdStepE2EGC(commonOpts *opts.CommonOptions) *cobra.Command {
    52  	options := StepE2EGCOptions{
    53  		StepOptions: step.StepOptions{
    54  			CommonOptions: commonOpts,
    55  		},
    56  	}
    57  	cmd := &cobra.Command{
    58  		Use:     "gc",
    59  		Short:   "Removes unused e2e clusters",
    60  		Aliases: []string{},
    61  		Long:    stepE2EGCLong,
    62  		Example: stepE2EGCExample,
    63  		Run: func(cmd *cobra.Command, args []string) {
    64  			options.Cmd = cmd
    65  			options.Args = args
    66  			err := options.Run()
    67  			helper.CheckErr(err)
    68  		},
    69  	}
    70  	cmd.Flags().StringVarP(&options.Region, "region", "", "europe-west1-c", "GKE region to use. Default: europe-west1-c")
    71  	cmd.Flags().StringVarP(&options.ProjectID, "project-id", "p", "", "Google Project ID to delete cluster from")
    72  	cmd.Flags().IntVarP(&options.Duration, "duration", "d", 2, "How many hours old a cluster should be before it is deleted if it does not have a --delete tag")
    73  	cmd.Flags().StringArrayVarP(&options.Providers, "providers", "", []string{"gke"}, "The providers to run the cleanup for")
    74  
    75  	return cmd
    76  }
    77  
    78  // Run runs the command
    79  func (o *StepE2EGCOptions) Run() error {
    80  	// Until https://github.com/jenkins-x/jx/issues/6206 is done, we are going to be using different approaches to run this for different providers
    81  	for _, pr := range o.Providers {
    82  		switch strings.ToLower(pr) {
    83  		case cloud.GKE:
    84  			return o.gcpGarbageCollection()
    85  		case cloud.AWS:
    86  			fallthrough
    87  		case cloud.EKS:
    88  			return o.eksGarbageCollection()
    89  		default:
    90  			return fmt.Errorf("provider %s doesn't have an E2E GC implementation defined", pr)
    91  		}
    92  	}
    93  	return nil
    94  }
    95  
    96  func (o *StepE2EGCOptions) eksGarbageCollection() error {
    97  	eksClient, err := factory.NewClientForProvider(cloud.EKS)
    98  	if err != nil {
    99  		return errors.Wrap(err, "could not obtain an EKS cluster client to ")
   100  	}
   101  	eksClusters, err := eksClient.List()
   102  	if err != nil {
   103  		return errors.Wrap(err, "there was a problem obtaining every eksClient in the current account")
   104  	}
   105  
   106  	for _, eksCluster := range eksClusters {
   107  		if eksCluster.Status == "ACTIVE" {
   108  			if !o.ShouldDeleteMarkedEKSCluster(eksCluster) {
   109  				if !o.ShouldDeleteOlderThanDurationEKS(eksCluster) {
   110  					if o.ShouldDeleteDueToNewerRunEKS(eksCluster, eksClusters) {
   111  						err = o.deleteEksCluster(eksCluster, eksClient)
   112  					}
   113  				} else {
   114  					err = o.deleteEksCluster(eksCluster, eksClient)
   115  				}
   116  			} else {
   117  				err = o.deleteEksCluster(eksCluster, eksClient)
   118  			}
   119  		}
   120  		if err != nil {
   121  			log.Logger().Errorf("error deleting cluster %s: %s", eksCluster.Name, err.Error())
   122  		}
   123  	}
   124  	return nil
   125  }
   126  
   127  func (o *StepE2EGCOptions) gcpGarbageCollection() error {
   128  	err := o.InstallRequirements(cloud.GKE)
   129  	if err != nil {
   130  		return err
   131  	}
   132  	gkeSa := os.Getenv("GKE_SA_KEY_FILE")
   133  	if gkeSa != "" {
   134  		err = o.GCloud().Login(gkeSa, true)
   135  		if err != nil {
   136  			return err
   137  		}
   138  	}
   139  
   140  	clusters, err := o.GCloud().ListClusters(o.Region, o.ProjectID)
   141  	if err != nil {
   142  		return err
   143  	}
   144  
   145  	for _, cluster := range clusters {
   146  		c := cluster
   147  		if c.Status == "RUNNING" {
   148  			// Marked for deletion
   149  			if !o.ShouldDeleteMarkedCluster(&c) {
   150  				// Older than duration in hours
   151  				if !o.ShouldDeleteOlderThanDuration(&c) {
   152  					// Delete build that has been replaced by a newer version
   153  					if o.ShouldDeleteDueToNewerRun(&c, clusters) {
   154  						o.deleteGkeCluster(&c)
   155  					}
   156  				} else {
   157  					o.deleteGkeCluster(&c)
   158  				}
   159  			} else {
   160  				o.deleteGkeCluster(&c)
   161  			}
   162  		}
   163  	}
   164  	gkeGCOpts := gc.GCGKEOptions{
   165  		CommonOptions: &opts.CommonOptions{},
   166  	}
   167  	gkeGCOpts.Err = o.Err
   168  	gkeGCOpts.Out = o.Out
   169  	gkeGCOpts.Flags.ProjectID = o.ProjectID
   170  	gkeGCOpts.Flags.RunNow = true
   171  	return gkeGCOpts.Run()
   172  }
   173  
   174  // GetBuildNumberFromClusterEKS gets the build number from the cluster labels
   175  func (o *StepE2EGCOptions) GetBuildNumberFromClusterEKS(cluster *cluster.Cluster) (int, error) {
   176  	if branch, ok := cluster.Labels["branch"]; ok {
   177  		if clusterType, ok := cluster.Labels["cluster"]; ok {
   178  			buildNumStr := strings.Replace(strings.Replace(cluster.Name, branch+"-", "", -1), "-"+clusterType, "", -1)
   179  			return strconv.Atoi(buildNumStr)
   180  		}
   181  	}
   182  	return 0, fmt.Errorf("finding build number for cluster " + cluster.Name)
   183  }
   184  
   185  // GetBuildNumberFromCluster gets the build number from the cluster labels
   186  func (o *StepE2EGCOptions) GetBuildNumberFromCluster(cluster *gke.Cluster) (int, error) {
   187  	if branch, ok := cluster.ResourceLabels["branch"]; ok {
   188  		if clusterType, ok := cluster.ResourceLabels["cluster"]; ok {
   189  			buildNumStr := strings.Replace(strings.Replace(cluster.Name, branch+"-", "", -1), "-"+clusterType, "", -1)
   190  			return strconv.Atoi(buildNumStr)
   191  		}
   192  	}
   193  	return 0, fmt.Errorf("finding build number for cluster " + cluster.Name)
   194  }
   195  
   196  // ShouldDeleteMarkedCluster returns true if the cluster has a delete label
   197  func (o *StepE2EGCOptions) ShouldDeleteMarkedCluster(cluster *gke.Cluster) bool {
   198  	if deleteLabel, ok := cluster.ResourceLabels["delete-me"]; ok {
   199  		if deleteLabel == "true" {
   200  			return true
   201  		}
   202  	}
   203  	return false
   204  }
   205  
   206  // ShouldDeleteMarkedEKSCluster returns true if the cluster has a delete label
   207  func (o *StepE2EGCOptions) ShouldDeleteMarkedEKSCluster(cluster *cluster.Cluster) bool {
   208  	if deleteLabel, ok := cluster.Labels["delete-me"]; ok {
   209  		if deleteLabel == "true" {
   210  			return true
   211  		}
   212  	}
   213  	return false
   214  }
   215  
   216  // ShouldDeleteOlderThanDurationEKS returns true if the cluster is older than the delete duration and does not have a keep label
   217  func (o *StepE2EGCOptions) ShouldDeleteOlderThanDurationEKS(cluster *cluster.Cluster) bool {
   218  	if createdTime, ok := cluster.Labels["create-time"]; ok {
   219  		createdDate, err := time.Parse("Mon-Jan-2-2006-15-04-05", createdTime)
   220  		if err != nil {
   221  			log.Logger().Errorf("Error parsing date for cluster %s", createdTime)
   222  			log.Logger().Error(err)
   223  		} else {
   224  			ttlExceededDate := createdDate.Add(time.Duration(o.Duration) * time.Hour)
   225  			now := time.Now().UTC()
   226  			if now.After(ttlExceededDate) {
   227  				if _, ok := cluster.Labels["keep-me"]; !ok {
   228  					return true
   229  				}
   230  			}
   231  		}
   232  	}
   233  	return false
   234  }
   235  
   236  // ShouldDeleteOlderThanDuration returns true if the cluster is older than the delete duration and does not have a keep label
   237  func (o *StepE2EGCOptions) ShouldDeleteOlderThanDuration(cluster *gke.Cluster) bool {
   238  	if createdTime, ok := cluster.ResourceLabels["create-time"]; ok {
   239  		createdDate, err := time.Parse("Mon-Jan-2-2006-15-04-05", createdTime)
   240  		if err != nil {
   241  			log.Logger().Errorf("Error parsing date for cluster %s", createdTime)
   242  			log.Logger().Error(err)
   243  		} else {
   244  			ttlExceededDate := createdDate.Add(time.Duration(o.Duration) * time.Hour)
   245  			now := time.Now().UTC()
   246  			if now.After(ttlExceededDate) {
   247  				if _, ok := cluster.ResourceLabels["keep-me"]; !ok {
   248  					return true
   249  				}
   250  			}
   251  		}
   252  	}
   253  	return false
   254  }
   255  
   256  // ShouldDeleteDueToNewerRunEKS returns true if a cluster with a higher build number exists
   257  func (o *StepE2EGCOptions) ShouldDeleteDueToNewerRunEKS(cluster *cluster.Cluster, clusters []*cluster.Cluster) bool {
   258  	if branchLabel, ok := cluster.Labels["branch"]; ok {
   259  		if strings.Contains(branchLabel, "pr-") {
   260  			currentBuildNumber, err := o.GetBuildNumberFromClusterEKS(cluster)
   261  			if err == nil {
   262  				if clusterType, ok := cluster.Labels["cluster"]; ok {
   263  					for _, existingCluster := range clusters {
   264  						// Check for same PR & Cluster type
   265  						if existingClusterType, ok := existingCluster.Labels["cluster"]; ok {
   266  							if strings.Contains(existingCluster.Name, branchLabel) && existingClusterType == clusterType {
   267  								existingBuildNumber, err := o.GetBuildNumberFromClusterEKS(existingCluster)
   268  								if err == nil {
   269  									// Delete the older build
   270  									if currentBuildNumber < existingBuildNumber {
   271  										if _, ok := cluster.Labels["keep-me"]; !ok {
   272  											return true
   273  										}
   274  										break
   275  									}
   276  								}
   277  							}
   278  						}
   279  					}
   280  				}
   281  			}
   282  		}
   283  	}
   284  	return false
   285  }
   286  
   287  // ShouldDeleteDueToNewerRun returns true if a cluster with a higher build number exists
   288  func (o *StepE2EGCOptions) ShouldDeleteDueToNewerRun(cluster *gke.Cluster, clusters []gke.Cluster) bool {
   289  	if branchLabel, ok := cluster.ResourceLabels["branch"]; ok {
   290  		if strings.Contains(branchLabel, "pr-") {
   291  			currentBuildNumber, err := o.GetBuildNumberFromCluster(cluster)
   292  			if err == nil {
   293  				if clusterType, ok := cluster.ResourceLabels["cluster"]; ok {
   294  					for _, ec := range clusters {
   295  						existingCluster := ec
   296  						// Check for same PR & Cluster type
   297  						if existingClusterType, ok := existingCluster.ResourceLabels["cluster"]; ok {
   298  							if strings.Contains(existingCluster.Name, branchLabel) && existingClusterType == clusterType {
   299  								existingBuildNumber, err := o.GetBuildNumberFromCluster(&existingCluster)
   300  								if err == nil {
   301  									// Delete the older build
   302  									if currentBuildNumber < existingBuildNumber {
   303  										if _, ok := cluster.ResourceLabels["keep-me"]; !ok {
   304  											return true
   305  										}
   306  										break
   307  									}
   308  								}
   309  							}
   310  						}
   311  					}
   312  				}
   313  			}
   314  		}
   315  	}
   316  	return false
   317  }
   318  
   319  func (o *StepE2EGCOptions) deleteEksCluster(cluster *cluster.Cluster, client cluster.Client) error {
   320  	err := client.Delete(cluster)
   321  	if err != nil {
   322  		return errors.Wrapf(err, "error deleting EKS cluster %s", cluster.Name)
   323  	}
   324  	return nil
   325  }
   326  
   327  func (o *StepE2EGCOptions) deleteGkeCluster(cluster *gke.Cluster) {
   328  	deleteOptions := &deletecmd.DeleteGkeOptions{
   329  		Options: get.Options{
   330  			CommonOptions: &opts.CommonOptions{},
   331  		},
   332  	}
   333  	deleteOptions.Args = []string{cluster.Name}
   334  	deleteOptions.ProjectID = o.ProjectID
   335  	deleteOptions.Region = o.Region
   336  	err := deleteOptions.Run()
   337  	if err != nil {
   338  		log.Logger().Error(err)
   339  	} else {
   340  		log.Logger().Infof("Deleted cluster %s", cluster.Name)
   341  	}
   342  }