github.com/abayer/test-infra@v0.0.5/prow/cmd/sinker/main.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"flag"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/sirupsen/logrus"
    25  
    26  	"k8s.io/test-infra/prow/config"
    27  	"k8s.io/test-infra/prow/kube"
    28  	"k8s.io/test-infra/prow/logrusutil"
    29  	"k8s.io/test-infra/prow/pjutil"
    30  )
    31  
    32  type kubeClient interface {
    33  	ListPods(selector string) ([]kube.Pod, error)
    34  	DeletePod(name string) error
    35  
    36  	ListProwJobs(selector string) ([]kube.ProwJob, error)
    37  	DeleteProwJob(name string) error
    38  }
    39  
    40  type configAgent interface {
    41  	Config() *config.Config
    42  }
    43  
    44  type options struct {
    45  	runOnce       bool
    46  	configPath    string
    47  	jobConfigPath string
    48  	buildCluster  string
    49  }
    50  
    51  func gatherOptions() options {
    52  	o := options{}
    53  	flag.BoolVar(&o.runOnce, "run-once", false, "If true, run only once then quit.")
    54  	flag.StringVar(&o.configPath, "config-path", "/etc/config/config.yaml", "Path to config.yaml.")
    55  	flag.StringVar(&o.jobConfigPath, "job-config-path", "", "Path to prow job configs.")
    56  	flag.StringVar(&o.buildCluster, "build-cluster", "", "Path to kube.Cluster YAML file. If empty, uses the local cluster.")
    57  	flag.Parse()
    58  	return o
    59  }
    60  func main() {
    61  	o := gatherOptions()
    62  	logrus.SetFormatter(
    63  		logrusutil.NewDefaultFieldsFormatter(nil, logrus.Fields{"component": "sinker"}),
    64  	)
    65  
    66  	configAgent := &config.Agent{}
    67  	if err := configAgent.Start(o.configPath, o.jobConfigPath); err != nil {
    68  		logrus.WithError(err).Fatal("Error starting config agent.")
    69  	}
    70  
    71  	kc, err := kube.NewClientInCluster(configAgent.Config().ProwJobNamespace)
    72  	if err != nil {
    73  		logrus.WithError(err).Error("Error getting client.")
    74  		return
    75  	}
    76  
    77  	var pkcs map[string]*kube.Client
    78  	if o.buildCluster == "" {
    79  		pkcs = map[string]*kube.Client{
    80  			kube.DefaultClusterAlias: kc.Namespace(configAgent.Config().PodNamespace),
    81  		}
    82  	} else {
    83  		pkcs, err = kube.ClientMapFromFile(o.buildCluster, configAgent.Config().PodNamespace)
    84  		if err != nil {
    85  			logrus.WithError(err).Fatal("Error getting kube client(s).")
    86  		}
    87  	}
    88  
    89  	kubeClients := map[string]kubeClient{}
    90  	for alias, client := range pkcs {
    91  		kubeClients[alias] = kubeClient(client)
    92  	}
    93  	c := controller{
    94  		logger:      logrus.NewEntry(logrus.StandardLogger()),
    95  		kc:          kc,
    96  		pkcs:        kubeClients,
    97  		configAgent: configAgent,
    98  	}
    99  
   100  	// Clean now and regularly from now on.
   101  	for {
   102  		start := time.Now()
   103  		c.clean()
   104  		logrus.Infof("Sync time: %v", time.Since(start))
   105  		if o.runOnce {
   106  			break
   107  		}
   108  		time.Sleep(configAgent.Config().Sinker.ResyncPeriod)
   109  	}
   110  }
   111  
   112  type controller struct {
   113  	logger      *logrus.Entry
   114  	kc          kubeClient
   115  	pkcs        map[string]kubeClient
   116  	configAgent configAgent
   117  }
   118  
   119  func (c *controller) clean() {
   120  	// Clean up old prow jobs first.
   121  	prowJobs, err := c.kc.ListProwJobs(kube.EmptySelector)
   122  	if err != nil {
   123  		c.logger.WithError(err).Error("Error listing prow jobs.")
   124  		return
   125  	}
   126  
   127  	// Only delete pod if its prowjob is marked as finished
   128  	isFinished := make(map[string]bool)
   129  
   130  	maxProwJobAge := c.configAgent.Config().Sinker.MaxProwJobAge
   131  	for _, prowJob := range prowJobs {
   132  		// Handle periodics separately.
   133  		if prowJob.Spec.Type == kube.PeriodicJob {
   134  			continue
   135  		}
   136  		if !prowJob.Complete() {
   137  			continue
   138  		}
   139  		isFinished[prowJob.ObjectMeta.Name] = true
   140  		if time.Since(prowJob.Status.StartTime.Time) <= maxProwJobAge {
   141  			continue
   142  		}
   143  		if err := c.kc.DeleteProwJob(prowJob.ObjectMeta.Name); err == nil {
   144  			c.logger.WithFields(pjutil.ProwJobFields(&prowJob)).Info("Deleted prowjob.")
   145  		} else {
   146  			c.logger.WithFields(pjutil.ProwJobFields(&prowJob)).WithError(err).Error("Error deleting prowjob.")
   147  		}
   148  	}
   149  
   150  	// Keep track of what periodic jobs are in the config so we will
   151  	// not clean up their last prowjob.
   152  	isActivePeriodic := make(map[string]bool)
   153  	for _, p := range c.configAgent.Config().Periodics {
   154  		isActivePeriodic[p.Name] = true
   155  	}
   156  
   157  	// Get the jobs that we need to retain so horologium can continue working
   158  	// as intended.
   159  	latestPeriodics := pjutil.GetLatestProwJobs(prowJobs, kube.PeriodicJob)
   160  	for _, prowJob := range prowJobs {
   161  		if prowJob.Spec.Type != kube.PeriodicJob {
   162  			continue
   163  		}
   164  
   165  		latestPJ := latestPeriodics[prowJob.Spec.Job]
   166  		if isActivePeriodic[prowJob.Spec.Job] && prowJob.ObjectMeta.Name == latestPJ.ObjectMeta.Name {
   167  			// Ignore deleting this one.
   168  			continue
   169  		}
   170  		if !prowJob.Complete() {
   171  			continue
   172  		}
   173  		isFinished[prowJob.ObjectMeta.Name] = true
   174  		if time.Since(prowJob.Status.StartTime.Time) <= maxProwJobAge {
   175  			continue
   176  		}
   177  		if err := c.kc.DeleteProwJob(prowJob.ObjectMeta.Name); err == nil {
   178  			c.logger.WithFields(pjutil.ProwJobFields(&prowJob)).Info("Deleted prowjob.")
   179  		} else {
   180  			c.logger.WithFields(pjutil.ProwJobFields(&prowJob)).WithError(err).Error("Error deleting prowjob.")
   181  		}
   182  	}
   183  
   184  	// Now clean up old pods.
   185  	selector := fmt.Sprintf("%s = %s", kube.CreatedByProw, "true")
   186  	for _, client := range c.pkcs {
   187  		pods, err := client.ListPods(selector)
   188  		if err != nil {
   189  			c.logger.WithError(err).Error("Error listing pods.")
   190  			return
   191  		}
   192  		maxPodAge := c.configAgent.Config().Sinker.MaxPodAge
   193  		for _, pod := range pods {
   194  			if _, ok := isFinished[pod.ObjectMeta.Name]; !ok {
   195  				// prowjob is not marked as completed yet
   196  				// deleting the pod now will result in plank creating a brand new pod
   197  				continue
   198  			}
   199  			if !pod.Status.StartTime.IsZero() && time.Since(pod.Status.StartTime.Time) > maxPodAge {
   200  				// Delete old completed pods. Don't quit if we fail to delete one.
   201  				if err := client.DeletePod(pod.ObjectMeta.Name); err == nil {
   202  					c.logger.WithField("pod", pod.ObjectMeta.Name).Info("Deleted old completed pod.")
   203  				} else {
   204  					c.logger.WithField("pod", pod.ObjectMeta.Name).WithError(err).Error("Error deleting pod.")
   205  				}
   206  			}
   207  		}
   208  	}
   209  }