github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/controller/state/imagescan/scanner.go (about)

     1  package imagescan
     2  
     3  import (
     4  	"context"
     5  	"encoding/hex"
     6  	"errors"
     7  	"fmt"
     8  	"hash/fnv"
     9  	"io"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/castai/kvisor/cmd/controller/kube"
    14  	"github.com/samber/lo"
    15  	batchv1 "k8s.io/api/batch/v1"
    16  	corev1 "k8s.io/api/core/v1"
    17  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    18  	"k8s.io/apimachinery/pkg/api/resource"
    19  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    20  	"k8s.io/apimachinery/pkg/labels"
    21  	"k8s.io/apimachinery/pkg/util/wait"
    22  	"k8s.io/client-go/kubernetes"
    23  	batchv1typed "k8s.io/client-go/kubernetes/typed/batch/v1"
    24  
    25  	imagescanconfig "github.com/castai/kvisor/cmd/imagescan/config"
    26  )
    27  
    28  const (
    29  	nonRootUserID = int64(65532)
    30  )
    31  
    32  var (
    33  	errJobPodNotFound = errors.New("job pod not found")
    34  )
    35  
    36  type imageScanner interface {
    37  	ScanImage(ctx context.Context, cfg ScanImageParams) (err error)
    38  }
    39  
    40  func NewImageScanner(client kubernetes.Interface, cfg Config, podNamespace string) *Scanner {
    41  	return &Scanner{
    42  		podLogProvider:   kube.NewPodLogReader(client),
    43  		client:           client,
    44  		jobCheckInterval: 5 * time.Second,
    45  		cfg:              cfg,
    46  		podNamespace:     podNamespace,
    47  	}
    48  }
    49  
    50  type Scanner struct {
    51  	podLogProvider   kube.PodLogProvider
    52  	client           kubernetes.Interface
    53  	cfg              Config
    54  	jobCheckInterval time.Duration
    55  	podNamespace     string
    56  }
    57  
    58  type ScanImageParams struct {
    59  	ImageName                   string // Example: ghcr.io/castai/kvisor/kvisor:8889dc92d6c69420a811de4fc67e619a30c028e9
    60  	ImageID                     string // Example: ghcr.io/castai/kvisor/kvisor@sha256:2db087348c66274941013a3163036b1ca09da03ea64e9f9cdd79b8f647e4fe44
    61  	ContainerRuntime            string
    62  	Mode                        string
    63  	NodeName                    string
    64  	ResourceIDs                 []string
    65  	DeleteFinishedJob           bool
    66  	WaitForCompletion           bool
    67  	WaitDurationAfterCompletion time.Duration
    68  	Architecture                string
    69  	Os                          string
    70  
    71  	ScanImageDetails kube.ImageDetails
    72  }
    73  
    74  func (s *Scanner) ScanImage(ctx context.Context, params ScanImageParams) (rerr error) {
    75  	if params.ImageID == "" {
    76  		return errors.New("image ID is required")
    77  	}
    78  	if params.ImageName == "" {
    79  		return errors.New("image name is required")
    80  	}
    81  	if params.ContainerRuntime == "" {
    82  		return errors.New("container runtime is required")
    83  	}
    84  	if len(params.ResourceIDs) == 0 {
    85  		return errors.New("resource ids are required")
    86  	}
    87  	if s.podNamespace == "" {
    88  		return errors.New("pod namespace is required")
    89  	}
    90  
    91  	jobName := genJobName(params.ImageName)
    92  	vols := volumesAndMounts{}
    93  	mode := imagescanconfig.Mode(params.Mode)
    94  	containerRuntime := params.ContainerRuntime
    95  
    96  	switch containerRuntime {
    97  	case "docker":
    98  		if mode == "" {
    99  			mode = imagescanconfig.ModeDaemon
   100  		}
   101  		if mode == imagescanconfig.ModeDaemon {
   102  			vols.volumes = append(vols.volumes, corev1.Volume{
   103  				Name: "docker-sock",
   104  				VolumeSource: corev1.VolumeSource{
   105  					HostPath: &corev1.HostPathVolumeSource{
   106  						Path: "/var/run/docker.sock",
   107  						Type: lo.ToPtr(corev1.HostPathSocket),
   108  					},
   109  				},
   110  			})
   111  			vols.mounts = append(vols.mounts, corev1.VolumeMount{
   112  				Name:      "docker-sock",
   113  				ReadOnly:  true,
   114  				MountPath: "/var/run/docker.sock",
   115  			})
   116  		}
   117  	case "containerd":
   118  		if mode == "" {
   119  			mode = imagescanconfig.ModeHostFS
   120  		}
   121  		if mode == imagescanconfig.ModeHostFS {
   122  			vols.volumes = append(vols.volumes, corev1.Volume{
   123  				Name: "containerd-content",
   124  				VolumeSource: corev1.VolumeSource{
   125  					HostPath: &corev1.HostPathVolumeSource{
   126  						Path: imagescanconfig.ContainerdContentDir,
   127  						Type: lo.ToPtr(corev1.HostPathDirectory),
   128  					},
   129  				},
   130  			})
   131  			vols.mounts = append(vols.mounts, corev1.VolumeMount{
   132  				Name:      "containerd-content",
   133  				ReadOnly:  true,
   134  				MountPath: imagescanconfig.ContainerdContentDir,
   135  			})
   136  		} else if mode == imagescanconfig.ModeDaemon {
   137  			vols.volumes = append(vols.volumes, corev1.Volume{
   138  				Name: "containerd-sock",
   139  				VolumeSource: corev1.VolumeSource{
   140  					HostPath: &corev1.HostPathVolumeSource{
   141  						Path: "/run/containerd/containerd.sock",
   142  						Type: lo.ToPtr(corev1.HostPathSocket),
   143  					},
   144  				},
   145  			})
   146  			vols.mounts = append(vols.mounts, corev1.VolumeMount{
   147  				Name:      "containerd-sock",
   148  				ReadOnly:  true,
   149  				MountPath: "/run/containerd/containerd.sock",
   150  			})
   151  		}
   152  		if s.cfg.PrivateRegistryPullSecret != "" {
   153  			vols.volumes = append(vols.volumes, corev1.Volume{
   154  				Name: "pull-secret",
   155  				VolumeSource: corev1.VolumeSource{
   156  					Secret: &corev1.SecretVolumeSource{
   157  						SecretName: s.cfg.PrivateRegistryPullSecret,
   158  					},
   159  				},
   160  			})
   161  			vols.mounts = append(vols.mounts, corev1.VolumeMount{
   162  				Name:      "pull-secret",
   163  				ReadOnly:  true,
   164  				MountPath: imagescanconfig.SecretMountPath,
   165  			})
   166  		}
   167  	}
   168  
   169  	envVars := []corev1.EnvVar{
   170  		{
   171  			Name:  "GOMEMLIMIT",
   172  			Value: "1800MiB",
   173  		},
   174  		{
   175  			Name:  "COLLECTOR_IMAGE_ID",
   176  			Value: params.ImageID,
   177  		},
   178  		{
   179  			Name:  "COLLECTOR_IMAGE_NAME",
   180  			Value: params.ImageName,
   181  		},
   182  		{
   183  			Name:  "COLLECTOR_TIMEOUT",
   184  			Value: "5m",
   185  		},
   186  		{
   187  			Name:  "COLLECTOR_MODE",
   188  			Value: string(mode),
   189  		},
   190  		{
   191  			Name:  "COLLECTOR_RUNTIME",
   192  			Value: containerRuntime,
   193  		},
   194  		{
   195  			Name:  "COLLECTOR_RESOURCE_IDS",
   196  			Value: strings.Join(params.ResourceIDs, ","),
   197  		},
   198  		{
   199  			Name:  "COLLECTOR_IMAGE_ARCHITECTURE",
   200  			Value: params.Architecture,
   201  		},
   202  		{
   203  			Name:  "COLLECTOR_IMAGE_OS",
   204  			Value: params.Os,
   205  		},
   206  		{
   207  			Name:  "CASTAI_API_GRPC_ADDR",
   208  			Value: s.cfg.CastaiGRPCAddress,
   209  		},
   210  		{
   211  			Name:  "CASTAI_CLUSTER_ID",
   212  			Value: s.cfg.CastaiClusterID,
   213  		},
   214  	}
   215  
   216  	if s.cfg.CastaiGrpcInsecure {
   217  		envVars = append(envVars, corev1.EnvVar{
   218  			Name:  "CASTAI_GRPC_INSECURE",
   219  			Value: "true",
   220  		})
   221  	}
   222  
   223  	if s.cfg.PrivateRegistryPullSecret != "" {
   224  		envVars = append(envVars, corev1.EnvVar{
   225  			Name:  "COLLECTOR_PULL_SECRET",
   226  			Value: s.cfg.PrivateRegistryPullSecret,
   227  		})
   228  	}
   229  
   230  	if s.cfg.ImageScanBlobsCacheURL != "" {
   231  		envVars = append(envVars, corev1.EnvVar{
   232  			Name:  "COLLECTOR_BLOBS_CACHE_URL",
   233  			Value: s.cfg.ImageScanBlobsCacheURL,
   234  		})
   235  	}
   236  
   237  	podAnnotations := map[string]string{}
   238  	if s.cfg.ProfileEnabled {
   239  		if s.cfg.PhlareEnabled {
   240  			podAnnotations["phlare.grafana.com/scrape"] = "true"
   241  			podAnnotations["phlare.grafana.com/port"] = "6060"
   242  		}
   243  		envVars = append(envVars, corev1.EnvVar{
   244  			Name:  "COLLECTOR_PPROF_ADDR",
   245  			Value: ":6060",
   246  		})
   247  	}
   248  
   249  	tolerations := []corev1.Toleration{
   250  		{
   251  			Operator: corev1.TolerationOpExists,
   252  			Key:      "scheduling.cast.ai/spot",
   253  		},
   254  	}
   255  
   256  	jobSpec := scanJobSpec(
   257  		s.cfg,
   258  		params,
   259  		s.podNamespace,
   260  		jobName,
   261  		envVars,
   262  		podAnnotations,
   263  		vols,
   264  		tolerations,
   265  	)
   266  	jobs := s.client.BatchV1().Jobs(s.podNamespace)
   267  
   268  	if params.DeleteFinishedJob {
   269  		defer func() {
   270  			// Useful to keep job for a while to troubleshoot issues.
   271  			if params.WaitDurationAfterCompletion != 0 {
   272  				select {
   273  				case <-ctx.Done():
   274  					rerr = ctx.Err()
   275  					return
   276  				case <-time.After(params.WaitDurationAfterCompletion):
   277  				}
   278  			}
   279  
   280  			if err := jobs.Delete(ctx, jobSpec.Name, metav1.DeleteOptions{
   281  				PropagationPolicy: lo.ToPtr(metav1.DeletePropagationBackground),
   282  			}); err != nil && !apierrors.IsNotFound(err) {
   283  				rerr = fmt.Errorf("deleting finished job: %w", err)
   284  			}
   285  		}()
   286  	}
   287  
   288  	// If job already exist wait for completion and exit.
   289  	_, err := jobs.Get(ctx, jobSpec.Name, metav1.GetOptions{})
   290  	if err == nil {
   291  		if err := s.waitForCompletion(ctx, jobs, jobName); err != nil {
   292  			return fmt.Errorf("job already exist, wait for completion: %w", err)
   293  		}
   294  		return nil
   295  	}
   296  
   297  	// Create new job and wait for completion.
   298  	_, err = jobs.Create(ctx, jobSpec, metav1.CreateOptions{})
   299  	if err != nil {
   300  		return fmt.Errorf("creating job: %w", err)
   301  	}
   302  
   303  	if params.WaitForCompletion {
   304  		if err := s.waitForCompletion(ctx, jobs, jobName); err != nil {
   305  			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   306  			defer cancel()
   307  			jobPod, _ := s.getJobPod(ctx, jobName)
   308  			if jobPod != nil {
   309  				conds := getPodConditionsString(jobPod.Status.Conditions)
   310  				return fmt.Errorf("wait for completion, pod_conditions=%s: %w", conds, err)
   311  			}
   312  			return fmt.Errorf("wait for completion: %w", err)
   313  		}
   314  	}
   315  	return nil
   316  }
   317  
   318  func getPodConditionsString(conditions []corev1.PodCondition) string {
   319  	var condStrings []string
   320  	for _, condition := range conditions {
   321  		reason := condition.Reason
   322  		if reason == "" {
   323  			reason = condition.Message
   324  		}
   325  		condStrings = append(condStrings, fmt.Sprintf("[type=%s, status=%s, reason=%s]", condition.Type, condition.Status, reason))
   326  	}
   327  	return strings.Join(condStrings, ", ")
   328  }
   329  
   330  func (s *Scanner) waitForCompletion(ctx context.Context, jobs batchv1typed.JobInterface, jobName string) error {
   331  	return wait.PollUntilContextCancel(ctx, s.jobCheckInterval, false, func(ctx context.Context) (done bool, err error) {
   332  		job, err := jobs.Get(ctx, jobName, metav1.GetOptions{})
   333  		if err != nil {
   334  			if apierrors.IsNotFound(err) {
   335  				return true, nil
   336  			}
   337  			return false, err
   338  		}
   339  
   340  		done = lo.ContainsBy(job.Status.Conditions, func(v batchv1.JobCondition) bool {
   341  			return v.Status == corev1.ConditionTrue && v.Type == batchv1.JobComplete
   342  		})
   343  		if done {
   344  			return true, nil
   345  		}
   346  		failed := lo.ContainsBy(job.Status.Conditions, func(v batchv1.JobCondition) bool {
   347  			return v.Status == corev1.ConditionTrue && v.Type == batchv1.JobFailed
   348  		})
   349  		if failed {
   350  			jobPod, err := s.getJobPod(ctx, jobName)
   351  			if err != nil {
   352  				return true, err
   353  			}
   354  			logsStream, err := s.podLogProvider.GetLogReader(ctx, s.podNamespace, jobPod.Name)
   355  			if err != nil {
   356  				return true, fmt.Errorf("creating logs stream for failed job: %w", err)
   357  			}
   358  			defer logsStream.Close()
   359  			logs, err := io.ReadAll(logsStream)
   360  			if err != nil {
   361  				return true, fmt.Errorf("reading failed job logs: %w", err)
   362  			}
   363  			return true, fmt.Errorf("scan job failed: %s", string(logs))
   364  		}
   365  		return false, nil
   366  	})
   367  }
   368  
   369  func (s *Scanner) getJobPod(ctx context.Context, jobName string) (*corev1.Pod, error) {
   370  	jobPods, err := s.client.CoreV1().Pods(s.podNamespace).List(ctx, metav1.ListOptions{LabelSelector: labels.Set{"job-name": jobName}.String()})
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  	if len(jobPods.Items) == 0 {
   375  		return nil, errJobPodNotFound
   376  	}
   377  	if l := len(jobPods.Items); l != 1 {
   378  		return nil, fmt.Errorf("expected to get one job pod, got %d", l)
   379  	}
   380  	return &jobPods.Items[0], nil
   381  }
   382  
   383  type volumesAndMounts struct {
   384  	volumes []corev1.Volume
   385  	mounts  []corev1.VolumeMount
   386  }
   387  
   388  func genJobName(imageName string) string {
   389  	h := fnv.New128()
   390  	h.Write([]byte(imageName))
   391  	imgHash := hex.EncodeToString(h.Sum(nil))
   392  	return fmt.Sprintf("castai-imgscan-%s", imgHash)
   393  }
   394  
   395  func scanJobSpec(
   396  	cfg Config,
   397  	params ScanImageParams,
   398  	ns,
   399  	jobName string,
   400  	envVars []corev1.EnvVar,
   401  	annotations map[string]string,
   402  	vol volumesAndMounts,
   403  	tolerations []corev1.Toleration,
   404  ) *batchv1.Job {
   405  
   406  	podLabels := map[string]string{}
   407  	if cfg.CloudProvider == "aks" {
   408  		podLabels["azure.workload.identity/use"] = "true"
   409  	}
   410  
   411  	job := &batchv1.Job{
   412  		TypeMeta: metav1.TypeMeta{
   413  			Kind:       "Job",
   414  			APIVersion: "batch/v1",
   415  		},
   416  		ObjectMeta: metav1.ObjectMeta{
   417  			Name:      jobName,
   418  			Namespace: ns,
   419  			Annotations: map[string]string{
   420  				"autoscaling.cast.ai/disposable": "true",
   421  			},
   422  			Labels: map[string]string{
   423  				"app.kubernetes.io/managed-by": "castai",
   424  			},
   425  		},
   426  		Spec: batchv1.JobSpec{
   427  			TTLSecondsAfterFinished: lo.ToPtr(int32(100)),
   428  			BackoffLimit:            lo.ToPtr(int32(0)),
   429  			Template: corev1.PodTemplateSpec{
   430  				ObjectMeta: metav1.ObjectMeta{
   431  					Annotations: annotations,
   432  					Labels:      podLabels,
   433  				},
   434  				Spec: corev1.PodSpec{
   435  					RestartPolicy:      corev1.RestartPolicyNever,
   436  					ServiceAccountName: cfg.ServiceAccount,
   437  					Affinity: &corev1.Affinity{
   438  						NodeAffinity: &corev1.NodeAffinity{
   439  							RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
   440  								NodeSelectorTerms: []corev1.NodeSelectorTerm{
   441  									{
   442  										MatchExpressions: []corev1.NodeSelectorRequirement{
   443  											{
   444  												Key:      "kubernetes.io/os",
   445  												Operator: corev1.NodeSelectorOpIn,
   446  												Values:   []string{"linux"},
   447  											},
   448  										},
   449  									},
   450  								},
   451  							},
   452  						},
   453  					},
   454  					Tolerations:                  tolerations,
   455  					AutomountServiceAccountToken: lo.ToPtr(false),
   456  					ImagePullSecrets:             params.ScanImageDetails.ImagePullSecrets,
   457  					Containers: []corev1.Container{
   458  						{
   459  							SecurityContext: &corev1.SecurityContext{
   460  								RunAsUser:                lo.ToPtr(nonRootUserID),
   461  								RunAsNonRoot:             lo.ToPtr(true),
   462  								AllowPrivilegeEscalation: lo.ToPtr(false),
   463  							},
   464  							Name:  "collector",
   465  							Image: params.ScanImageDetails.ScannerImageName,
   466  							Command: []string{
   467  								"/usr/local/bin/kvisor-image-scanner",
   468  							},
   469  							Args: []string{
   470  								"scan",
   471  							},
   472  							ImagePullPolicy: corev1.PullPolicy(cfg.ScanJobImagePullPolicy),
   473  							Env:             envVars,
   474  							EnvFrom: []corev1.EnvFromSource{
   475  								{
   476  									SecretRef: &corev1.SecretEnvSource{
   477  										LocalObjectReference: corev1.LocalObjectReference{
   478  											Name: cfg.CastaiSecretRefName,
   479  										},
   480  									},
   481  								},
   482  							},
   483  							VolumeMounts: vol.mounts,
   484  							Resources:    corev1.ResourceRequirements{},
   485  						},
   486  					},
   487  					Volumes: vol.volumes,
   488  				},
   489  			},
   490  		},
   491  	}
   492  
   493  	if params.NodeName != "" {
   494  		job.Spec.Template.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = []corev1.PreferredSchedulingTerm{
   495  			{
   496  				Weight: 1,
   497  				Preference: corev1.NodeSelectorTerm{
   498  					MatchExpressions: []corev1.NodeSelectorRequirement{
   499  						{
   500  							Key:      "kubernetes.io/hostname",
   501  							Operator: corev1.NodeSelectorOpIn,
   502  							Values:   []string{params.NodeName},
   503  						},
   504  					},
   505  				},
   506  			},
   507  		}
   508  	}
   509  
   510  	if cfg.CPULimit != "" {
   511  		cpuLimit := resource.MustParse(cfg.CPULimit)
   512  		if job.Spec.Template.Spec.Containers[0].Resources.Limits == nil {
   513  			job.Spec.Template.Spec.Containers[0].Resources.Limits = map[corev1.ResourceName]resource.Quantity{}
   514  		}
   515  		job.Spec.Template.Spec.Containers[0].Resources.Limits[corev1.ResourceCPU] = cpuLimit
   516  	}
   517  
   518  	if cfg.CPURequest != "" {
   519  		cpuRequest := resource.MustParse(cfg.CPURequest)
   520  		if job.Spec.Template.Spec.Containers[0].Resources.Requests == nil {
   521  			job.Spec.Template.Spec.Containers[0].Resources.Requests = map[corev1.ResourceName]resource.Quantity{}
   522  		}
   523  		job.Spec.Template.Spec.Containers[0].Resources.Requests[corev1.ResourceCPU] = cpuRequest
   524  	}
   525  
   526  	if cfg.MemoryRequest != "" {
   527  		memRequest := resource.MustParse(cfg.MemoryRequest)
   528  		if job.Spec.Template.Spec.Containers[0].Resources.Requests == nil {
   529  			job.Spec.Template.Spec.Containers[0].Resources.Requests = map[corev1.ResourceName]resource.Quantity{}
   530  		}
   531  		job.Spec.Template.Spec.Containers[0].Resources.Requests[corev1.ResourceMemory] = memRequest
   532  	}
   533  
   534  	if cfg.MemoryLimit != "" {
   535  		memLimit := resource.MustParse(cfg.MemoryLimit)
   536  		if job.Spec.Template.Spec.Containers[0].Resources.Limits == nil {
   537  			job.Spec.Template.Spec.Containers[0].Resources.Limits = map[corev1.ResourceName]resource.Quantity{}
   538  		}
   539  		job.Spec.Template.Spec.Containers[0].Resources.Limits[corev1.ResourceMemory] = memLimit
   540  	}
   541  	return job
   542  }