github.com/jenkins-x/jx/v2@v2.1.155/pkg/cmd/step/e2e/step_e2e_gc.go (about) 1 package e2e 2 3 import ( 4 "fmt" 5 "os" 6 "strconv" 7 "strings" 8 "time" 9 10 "github.com/jenkins-x/jx/v2/pkg/cluster" 11 12 "github.com/pkg/errors" 13 14 "github.com/jenkins-x/jx/v2/pkg/cluster/factory" 15 16 "github.com/jenkins-x/jx-logging/pkg/log" 17 "github.com/jenkins-x/jx/v2/pkg/cloud" 18 "github.com/jenkins-x/jx/v2/pkg/cloud/gke" 19 "github.com/jenkins-x/jx/v2/pkg/cmd/deletecmd" 20 "github.com/jenkins-x/jx/v2/pkg/cmd/gc" 21 "github.com/jenkins-x/jx/v2/pkg/cmd/get" 22 "github.com/jenkins-x/jx/v2/pkg/cmd/helper" 23 "github.com/jenkins-x/jx/v2/pkg/cmd/opts" 24 "github.com/jenkins-x/jx/v2/pkg/cmd/opts/step" 25 "github.com/jenkins-x/jx/v2/pkg/cmd/templates" 26 "github.com/spf13/cobra" 27 ) 28 29 // StepE2EGCOptions contains the command line flags 30 type StepE2EGCOptions struct { 31 step.StepOptions 32 ProjectID string 33 Providers []string 34 Region string 35 Duration int 36 } 37 38 var ( 39 stepE2EGCLong = templates.LongDesc(` 40 This pipeline step removes stale E2E test clusters 41 `) 42 43 stepE2EGCExample = templates.Examples(` 44 # delete stale E2E test clusters 45 jx step e2e gc 46 47 `) 48 ) 49 50 // NewCmdStepE2EGC creates the CLI command 51 func NewCmdStepE2EGC(commonOpts *opts.CommonOptions) *cobra.Command { 52 options := StepE2EGCOptions{ 53 StepOptions: step.StepOptions{ 54 CommonOptions: commonOpts, 55 }, 56 } 57 cmd := &cobra.Command{ 58 Use: "gc", 59 Short: "Removes unused e2e clusters", 60 Aliases: []string{}, 61 Long: stepE2EGCLong, 62 Example: stepE2EGCExample, 63 Run: func(cmd *cobra.Command, args []string) { 64 options.Cmd = cmd 65 options.Args = args 66 err := options.Run() 67 helper.CheckErr(err) 68 }, 69 } 70 cmd.Flags().StringVarP(&options.Region, "region", "", "europe-west1-c", "GKE region to use. Default: europe-west1-c") 71 cmd.Flags().StringVarP(&options.ProjectID, "project-id", "p", "", "Google Project ID to delete cluster from") 72 cmd.Flags().IntVarP(&options.Duration, "duration", "d", 2, "How many hours old a cluster should be before it is deleted if it does not have a --delete tag") 73 cmd.Flags().StringArrayVarP(&options.Providers, "providers", "", []string{"gke"}, "The providers to run the cleanup for") 74 75 return cmd 76 } 77 78 // Run runs the command 79 func (o *StepE2EGCOptions) Run() error { 80 // Until https://github.com/jenkins-x/jx/issues/6206 is done, we are going to be using different approaches to run this for different providers 81 for _, pr := range o.Providers { 82 switch strings.ToLower(pr) { 83 case cloud.GKE: 84 return o.gcpGarbageCollection() 85 case cloud.AWS: 86 fallthrough 87 case cloud.EKS: 88 return o.eksGarbageCollection() 89 default: 90 return fmt.Errorf("provider %s doesn't have an E2E GC implementation defined", pr) 91 } 92 } 93 return nil 94 } 95 96 func (o *StepE2EGCOptions) eksGarbageCollection() error { 97 eksClient, err := factory.NewClientForProvider(cloud.EKS) 98 if err != nil { 99 return errors.Wrap(err, "could not obtain an EKS cluster client to ") 100 } 101 eksClusters, err := eksClient.List() 102 if err != nil { 103 return errors.Wrap(err, "there was a problem obtaining every eksClient in the current account") 104 } 105 106 for _, eksCluster := range eksClusters { 107 if eksCluster.Status == "ACTIVE" { 108 if !o.ShouldDeleteMarkedEKSCluster(eksCluster) { 109 if !o.ShouldDeleteOlderThanDurationEKS(eksCluster) { 110 if o.ShouldDeleteDueToNewerRunEKS(eksCluster, eksClusters) { 111 err = o.deleteEksCluster(eksCluster, eksClient) 112 } 113 } else { 114 err = o.deleteEksCluster(eksCluster, eksClient) 115 } 116 } else { 117 err = o.deleteEksCluster(eksCluster, eksClient) 118 } 119 } 120 if err != nil { 121 log.Logger().Errorf("error deleting cluster %s: %s", eksCluster.Name, err.Error()) 122 } 123 } 124 return nil 125 } 126 127 func (o *StepE2EGCOptions) gcpGarbageCollection() error { 128 err := o.InstallRequirements(cloud.GKE) 129 if err != nil { 130 return err 131 } 132 gkeSa := os.Getenv("GKE_SA_KEY_FILE") 133 if gkeSa != "" { 134 err = o.GCloud().Login(gkeSa, true) 135 if err != nil { 136 return err 137 } 138 } 139 140 clusters, err := o.GCloud().ListClusters(o.Region, o.ProjectID) 141 if err != nil { 142 return err 143 } 144 145 for _, cluster := range clusters { 146 c := cluster 147 if c.Status == "RUNNING" { 148 // Marked for deletion 149 if !o.ShouldDeleteMarkedCluster(&c) { 150 // Older than duration in hours 151 if !o.ShouldDeleteOlderThanDuration(&c) { 152 // Delete build that has been replaced by a newer version 153 if o.ShouldDeleteDueToNewerRun(&c, clusters) { 154 o.deleteGkeCluster(&c) 155 } 156 } else { 157 o.deleteGkeCluster(&c) 158 } 159 } else { 160 o.deleteGkeCluster(&c) 161 } 162 } 163 } 164 gkeGCOpts := gc.GCGKEOptions{ 165 CommonOptions: &opts.CommonOptions{}, 166 } 167 gkeGCOpts.Err = o.Err 168 gkeGCOpts.Out = o.Out 169 gkeGCOpts.Flags.ProjectID = o.ProjectID 170 gkeGCOpts.Flags.RunNow = true 171 return gkeGCOpts.Run() 172 } 173 174 // GetBuildNumberFromClusterEKS gets the build number from the cluster labels 175 func (o *StepE2EGCOptions) GetBuildNumberFromClusterEKS(cluster *cluster.Cluster) (int, error) { 176 if branch, ok := cluster.Labels["branch"]; ok { 177 if clusterType, ok := cluster.Labels["cluster"]; ok { 178 buildNumStr := strings.Replace(strings.Replace(cluster.Name, branch+"-", "", -1), "-"+clusterType, "", -1) 179 return strconv.Atoi(buildNumStr) 180 } 181 } 182 return 0, fmt.Errorf("finding build number for cluster " + cluster.Name) 183 } 184 185 // GetBuildNumberFromCluster gets the build number from the cluster labels 186 func (o *StepE2EGCOptions) GetBuildNumberFromCluster(cluster *gke.Cluster) (int, error) { 187 if branch, ok := cluster.ResourceLabels["branch"]; ok { 188 if clusterType, ok := cluster.ResourceLabels["cluster"]; ok { 189 buildNumStr := strings.Replace(strings.Replace(cluster.Name, branch+"-", "", -1), "-"+clusterType, "", -1) 190 return strconv.Atoi(buildNumStr) 191 } 192 } 193 return 0, fmt.Errorf("finding build number for cluster " + cluster.Name) 194 } 195 196 // ShouldDeleteMarkedCluster returns true if the cluster has a delete label 197 func (o *StepE2EGCOptions) ShouldDeleteMarkedCluster(cluster *gke.Cluster) bool { 198 if deleteLabel, ok := cluster.ResourceLabels["delete-me"]; ok { 199 if deleteLabel == "true" { 200 return true 201 } 202 } 203 return false 204 } 205 206 // ShouldDeleteMarkedEKSCluster returns true if the cluster has a delete label 207 func (o *StepE2EGCOptions) ShouldDeleteMarkedEKSCluster(cluster *cluster.Cluster) bool { 208 if deleteLabel, ok := cluster.Labels["delete-me"]; ok { 209 if deleteLabel == "true" { 210 return true 211 } 212 } 213 return false 214 } 215 216 // ShouldDeleteOlderThanDurationEKS returns true if the cluster is older than the delete duration and does not have a keep label 217 func (o *StepE2EGCOptions) ShouldDeleteOlderThanDurationEKS(cluster *cluster.Cluster) bool { 218 if createdTime, ok := cluster.Labels["create-time"]; ok { 219 createdDate, err := time.Parse("Mon-Jan-2-2006-15-04-05", createdTime) 220 if err != nil { 221 log.Logger().Errorf("Error parsing date for cluster %s", createdTime) 222 log.Logger().Error(err) 223 } else { 224 ttlExceededDate := createdDate.Add(time.Duration(o.Duration) * time.Hour) 225 now := time.Now().UTC() 226 if now.After(ttlExceededDate) { 227 if _, ok := cluster.Labels["keep-me"]; !ok { 228 return true 229 } 230 } 231 } 232 } 233 return false 234 } 235 236 // ShouldDeleteOlderThanDuration returns true if the cluster is older than the delete duration and does not have a keep label 237 func (o *StepE2EGCOptions) ShouldDeleteOlderThanDuration(cluster *gke.Cluster) bool { 238 if createdTime, ok := cluster.ResourceLabels["create-time"]; ok { 239 createdDate, err := time.Parse("Mon-Jan-2-2006-15-04-05", createdTime) 240 if err != nil { 241 log.Logger().Errorf("Error parsing date for cluster %s", createdTime) 242 log.Logger().Error(err) 243 } else { 244 ttlExceededDate := createdDate.Add(time.Duration(o.Duration) * time.Hour) 245 now := time.Now().UTC() 246 if now.After(ttlExceededDate) { 247 if _, ok := cluster.ResourceLabels["keep-me"]; !ok { 248 return true 249 } 250 } 251 } 252 } 253 return false 254 } 255 256 // ShouldDeleteDueToNewerRunEKS returns true if a cluster with a higher build number exists 257 func (o *StepE2EGCOptions) ShouldDeleteDueToNewerRunEKS(cluster *cluster.Cluster, clusters []*cluster.Cluster) bool { 258 if branchLabel, ok := cluster.Labels["branch"]; ok { 259 if strings.Contains(branchLabel, "pr-") { 260 currentBuildNumber, err := o.GetBuildNumberFromClusterEKS(cluster) 261 if err == nil { 262 if clusterType, ok := cluster.Labels["cluster"]; ok { 263 for _, existingCluster := range clusters { 264 // Check for same PR & Cluster type 265 if existingClusterType, ok := existingCluster.Labels["cluster"]; ok { 266 if strings.Contains(existingCluster.Name, branchLabel) && existingClusterType == clusterType { 267 existingBuildNumber, err := o.GetBuildNumberFromClusterEKS(existingCluster) 268 if err == nil { 269 // Delete the older build 270 if currentBuildNumber < existingBuildNumber { 271 if _, ok := cluster.Labels["keep-me"]; !ok { 272 return true 273 } 274 break 275 } 276 } 277 } 278 } 279 } 280 } 281 } 282 } 283 } 284 return false 285 } 286 287 // ShouldDeleteDueToNewerRun returns true if a cluster with a higher build number exists 288 func (o *StepE2EGCOptions) ShouldDeleteDueToNewerRun(cluster *gke.Cluster, clusters []gke.Cluster) bool { 289 if branchLabel, ok := cluster.ResourceLabels["branch"]; ok { 290 if strings.Contains(branchLabel, "pr-") { 291 currentBuildNumber, err := o.GetBuildNumberFromCluster(cluster) 292 if err == nil { 293 if clusterType, ok := cluster.ResourceLabels["cluster"]; ok { 294 for _, ec := range clusters { 295 existingCluster := ec 296 // Check for same PR & Cluster type 297 if existingClusterType, ok := existingCluster.ResourceLabels["cluster"]; ok { 298 if strings.Contains(existingCluster.Name, branchLabel) && existingClusterType == clusterType { 299 existingBuildNumber, err := o.GetBuildNumberFromCluster(&existingCluster) 300 if err == nil { 301 // Delete the older build 302 if currentBuildNumber < existingBuildNumber { 303 if _, ok := cluster.ResourceLabels["keep-me"]; !ok { 304 return true 305 } 306 break 307 } 308 } 309 } 310 } 311 } 312 } 313 } 314 } 315 } 316 return false 317 } 318 319 func (o *StepE2EGCOptions) deleteEksCluster(cluster *cluster.Cluster, client cluster.Client) error { 320 err := client.Delete(cluster) 321 if err != nil { 322 return errors.Wrapf(err, "error deleting EKS cluster %s", cluster.Name) 323 } 324 return nil 325 } 326 327 func (o *StepE2EGCOptions) deleteGkeCluster(cluster *gke.Cluster) { 328 deleteOptions := &deletecmd.DeleteGkeOptions{ 329 Options: get.Options{ 330 CommonOptions: &opts.CommonOptions{}, 331 }, 332 } 333 deleteOptions.Args = []string{cluster.Name} 334 deleteOptions.ProjectID = o.ProjectID 335 deleteOptions.Region = o.Region 336 err := deleteOptions.Run() 337 if err != nil { 338 log.Logger().Error(err) 339 } else { 340 log.Logger().Infof("Deleted cluster %s", cluster.Name) 341 } 342 }