github.com/Racer159/jackal@v0.32.7-0.20240401174413-0bd2339e4f2e/src/pkg/cluster/injector.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // SPDX-FileCopyrightText: 2021-Present The Jackal Authors
     3  
     4  // Package cluster contains Jackal-specific cluster management functions.
     5  package cluster
     6  
     7  import (
     8  	"fmt"
     9  	"net/http"
    10  	"os"
    11  	"path/filepath"
    12  	"regexp"
    13  	"time"
    14  
    15  	"github.com/Racer159/jackal/src/config"
    16  	"github.com/Racer159/jackal/src/pkg/k8s"
    17  	"github.com/Racer159/jackal/src/pkg/layout"
    18  	"github.com/Racer159/jackal/src/pkg/message"
    19  	"github.com/Racer159/jackal/src/pkg/transform"
    20  	"github.com/Racer159/jackal/src/pkg/utils"
    21  	"github.com/defenseunicorns/pkg/helpers"
    22  	"github.com/google/go-containerregistry/pkg/crane"
    23  	"github.com/mholt/archiver/v3"
    24  	corev1 "k8s.io/api/core/v1"
    25  	"k8s.io/apimachinery/pkg/api/resource"
    26  	"k8s.io/apimachinery/pkg/util/intstr"
    27  )
    28  
    29  // The chunk size for the tarball chunks.
    30  var payloadChunkSize = 1024 * 768
    31  
    32  var (
    33  	injectorRequestedCPU    = resource.MustParse(".5")
    34  	injectorRequestedMemory = resource.MustParse("64Mi")
    35  	injectorLimitCPU        = resource.MustParse("1")
    36  	injectorLimitMemory     = resource.MustParse("256Mi")
    37  )
    38  
    39  // imageNodeMap is a map of image/node pairs.
    40  type imageNodeMap map[string][]string
    41  
    42  // StartInjectionMadness initializes a Jackal injection into the cluster.
    43  func (c *Cluster) StartInjectionMadness(tmpDir string, imagesDir string, injectorSeedSrcs []string) {
    44  	spinner := message.NewProgressSpinner("Attempting to bootstrap the seed image into the cluster")
    45  	defer spinner.Stop()
    46  
    47  	tmp := layout.InjectionMadnessPaths{
    48  		SeedImagesDir: filepath.Join(tmpDir, "seed-images"),
    49  		// should already exist
    50  		InjectionBinary: filepath.Join(tmpDir, "jackal-injector"),
    51  		// gets created here
    52  		InjectorPayloadTarGz: filepath.Join(tmpDir, "payload.tar.gz"),
    53  	}
    54  
    55  	if err := helpers.CreateDirectory(tmp.SeedImagesDir, helpers.ReadWriteExecuteUser); err != nil {
    56  		spinner.Fatalf(err, "Unable to create the seed images directory")
    57  	}
    58  
    59  	var err error
    60  	var images imageNodeMap
    61  	var payloadConfigmaps []string
    62  	var sha256sum string
    63  	var seedImages []transform.Image
    64  
    65  	// Get all the images from the cluster
    66  	timeout := 5 * time.Minute
    67  	spinner.Updatef("Getting the list of existing cluster images (%s timeout)", timeout.String())
    68  	if images, err = c.getImagesAndNodesForInjection(timeout); err != nil {
    69  		spinner.Fatalf(err, "Unable to generate a list of candidate images to perform the registry injection")
    70  	}
    71  
    72  	spinner.Updatef("Creating the injector configmap")
    73  	if err = c.createInjectorConfigmap(tmp.InjectionBinary); err != nil {
    74  		spinner.Fatalf(err, "Unable to create the injector configmap")
    75  	}
    76  
    77  	spinner.Updatef("Creating the injector service")
    78  	if service, err := c.createService(); err != nil {
    79  		spinner.Fatalf(err, "Unable to create the injector service")
    80  	} else {
    81  		config.JackalSeedPort = fmt.Sprintf("%d", service.Spec.Ports[0].NodePort)
    82  	}
    83  
    84  	spinner.Updatef("Loading the seed image from the package")
    85  	if seedImages, err = c.loadSeedImages(imagesDir, tmp.SeedImagesDir, injectorSeedSrcs, spinner); err != nil {
    86  		spinner.Fatalf(err, "Unable to load the injector seed image from the package")
    87  	}
    88  
    89  	spinner.Updatef("Loading the seed registry configmaps")
    90  	if payloadConfigmaps, sha256sum, err = c.createPayloadConfigmaps(tmp.SeedImagesDir, tmp.InjectorPayloadTarGz, spinner); err != nil {
    91  		spinner.Fatalf(err, "Unable to generate the injector payload configmaps")
    92  	}
    93  
    94  	// https://regex101.com/r/eLS3at/1
    95  	jackalImageRegex := regexp.MustCompile(`(?m)^127\.0\.0\.1:`)
    96  
    97  	// Try to create an injector pod using an existing image in the cluster
    98  	for image, node := range images {
    99  		// Don't try to run against the seed image if this is a secondary jackal init run
   100  		if jackalImageRegex.MatchString(image) {
   101  			continue
   102  		}
   103  
   104  		spinner.Updatef("Attempting to bootstrap with the %s/%s", node, image)
   105  
   106  		// Make sure the pod is not there first
   107  		_ = c.DeletePod(JackalNamespaceName, "injector")
   108  
   109  		// Update the podspec image path and use the first node found
   110  		pod, err := c.buildInjectionPod(node[0], image, payloadConfigmaps, sha256sum)
   111  		if err != nil {
   112  			// Just debug log the output because failures just result in trying the next image
   113  			message.Debug(err)
   114  			continue
   115  		}
   116  
   117  		// Create the pod in the cluster
   118  		pod, err = c.CreatePod(pod)
   119  		if err != nil {
   120  			// Just debug log the output because failures just result in trying the next image
   121  			message.Debug(pod, err)
   122  			continue
   123  		}
   124  
   125  		// if no error, try and wait for a seed image to be present, return if successful
   126  		if c.injectorIsReady(seedImages, spinner) {
   127  			spinner.Success()
   128  			return
   129  		}
   130  
   131  		// Otherwise just continue to try next image
   132  	}
   133  
   134  	// All images were exhausted and still no happiness
   135  	spinner.Fatalf(nil, "Unable to perform the injection")
   136  }
   137  
   138  // StopInjectionMadness handles cleanup once the seed registry is up.
   139  func (c *Cluster) StopInjectionMadness() error {
   140  	// Try to kill the injector pod now
   141  	if err := c.DeletePod(JackalNamespaceName, "injector"); err != nil {
   142  		return err
   143  	}
   144  
   145  	// Remove the configmaps
   146  	labelMatch := map[string]string{"jackal-injector": "payload"}
   147  	if err := c.DeleteConfigMapsByLabel(JackalNamespaceName, labelMatch); err != nil {
   148  		return err
   149  	}
   150  
   151  	// Remove the injector service
   152  	return c.DeleteService(JackalNamespaceName, "jackal-injector")
   153  }
   154  
   155  func (c *Cluster) loadSeedImages(imagesDir, seedImagesDir string, injectorSeedSrcs []string, spinner *message.Spinner) ([]transform.Image, error) {
   156  	seedImages := []transform.Image{}
   157  	localReferenceToDigest := make(map[string]string)
   158  
   159  	// Load the injector-specific images and save them as seed-images
   160  	for _, src := range injectorSeedSrcs {
   161  		spinner.Updatef("Loading the seed image '%s' from the package", src)
   162  		ref, err := transform.ParseImageRef(src)
   163  		if err != nil {
   164  			return seedImages, fmt.Errorf("failed to create ref for image %s: %w", src, err)
   165  		}
   166  		img, err := utils.LoadOCIImage(imagesDir, ref)
   167  		if err != nil {
   168  			return seedImages, err
   169  		}
   170  
   171  		crane.SaveOCI(img, seedImagesDir)
   172  
   173  		seedImages = append(seedImages, ref)
   174  
   175  		// Get the image digest so we can set an annotation in the image.json later
   176  		imgDigest, err := img.Digest()
   177  		if err != nil {
   178  			return seedImages, err
   179  		}
   180  		// This is done _without_ the domain (different from pull.go) since the injector only handles local images
   181  		localReferenceToDigest[ref.Path+ref.TagOrDigest] = imgDigest.String()
   182  	}
   183  
   184  	if err := utils.AddImageNameAnnotation(seedImagesDir, localReferenceToDigest); err != nil {
   185  		return seedImages, fmt.Errorf("unable to format OCI layout: %w", err)
   186  	}
   187  
   188  	return seedImages, nil
   189  }
   190  
   191  func (c *Cluster) createPayloadConfigmaps(seedImagesDir, tarPath string, spinner *message.Spinner) ([]string, string, error) {
   192  	var configMaps []string
   193  
   194  	// Chunk size has to accommodate base64 encoding & etcd 1MB limit
   195  	tarFileList, err := filepath.Glob(filepath.Join(seedImagesDir, "*"))
   196  	if err != nil {
   197  		return configMaps, "", err
   198  	}
   199  
   200  	spinner.Updatef("Creating the seed registry archive to send to the cluster")
   201  	// Create a tar archive of the injector payload
   202  	if err := archiver.Archive(tarFileList, tarPath); err != nil {
   203  		return configMaps, "", err
   204  	}
   205  
   206  	chunks, sha256sum, err := helpers.ReadFileByChunks(tarPath, payloadChunkSize)
   207  	if err != nil {
   208  		return configMaps, "", err
   209  	}
   210  
   211  	spinner.Updatef("Splitting the archive into binary configmaps")
   212  
   213  	chunkCount := len(chunks)
   214  
   215  	// Loop over all chunks and generate configmaps
   216  	for idx, data := range chunks {
   217  		// Create a cat-friendly filename
   218  		fileName := fmt.Sprintf("jackal-payload-%03d", idx)
   219  
   220  		// Store the binary data
   221  		configData := map[string][]byte{
   222  			fileName: data,
   223  		}
   224  
   225  		spinner.Updatef("Adding archive binary configmap %d of %d to the cluster", idx+1, chunkCount)
   226  
   227  		// Attempt to create the configmap in the cluster
   228  		if _, err = c.ReplaceConfigmap(JackalNamespaceName, fileName, configData); err != nil {
   229  			return configMaps, "", err
   230  		}
   231  
   232  		// Add the configmap to the configmaps slice for later usage in the pod
   233  		configMaps = append(configMaps, fileName)
   234  
   235  		// Give the control plane a 250ms buffer between each configmap
   236  		time.Sleep(250 * time.Millisecond)
   237  	}
   238  
   239  	return configMaps, sha256sum, nil
   240  }
   241  
   242  // Test for pod readiness and seed image presence.
   243  func (c *Cluster) injectorIsReady(seedImages []transform.Image, spinner *message.Spinner) bool {
   244  	tunnel, err := c.NewTunnel(JackalNamespaceName, k8s.SvcResource, JackalInjectorName, "", 0, JackalInjectorPort)
   245  	if err != nil {
   246  		return false
   247  	}
   248  
   249  	_, err = tunnel.Connect()
   250  	if err != nil {
   251  		return false
   252  	}
   253  	defer tunnel.Close()
   254  
   255  	spinner.Updatef("Testing the injector for seed image availability")
   256  
   257  	for _, seedImage := range seedImages {
   258  		seedRegistry := fmt.Sprintf("%s/v2/%s/manifests/%s", tunnel.HTTPEndpoint(), seedImage.Path, seedImage.Tag)
   259  
   260  		var resp *http.Response
   261  		var err error
   262  		err = tunnel.Wrap(func() error {
   263  			resp, err = http.Get(seedRegistry)
   264  			return err
   265  		})
   266  
   267  		if err != nil || resp.StatusCode != 200 {
   268  			// Just debug log the output because failures just result in trying the next image
   269  			message.Debug(resp, err)
   270  			return false
   271  		}
   272  	}
   273  
   274  	spinner.Updatef("Seed image found, injector is ready")
   275  	return true
   276  }
   277  
   278  func (c *Cluster) createInjectorConfigmap(binaryPath string) error {
   279  	var err error
   280  	configData := make(map[string][]byte)
   281  
   282  	// Add the injector binary data to the configmap
   283  	if configData["jackal-injector"], err = os.ReadFile(binaryPath); err != nil {
   284  		return err
   285  	}
   286  
   287  	// Try to delete configmap silently
   288  	_ = c.DeleteConfigmap(JackalNamespaceName, "rust-binary")
   289  
   290  	// Attempt to create the configmap in the cluster
   291  	if _, err = c.CreateConfigmap(JackalNamespaceName, "rust-binary", configData); err != nil {
   292  		return err
   293  	}
   294  
   295  	return nil
   296  }
   297  
   298  func (c *Cluster) createService() (*corev1.Service, error) {
   299  	service := c.GenerateService(JackalNamespaceName, "jackal-injector")
   300  
   301  	service.Spec.Type = corev1.ServiceTypeNodePort
   302  	service.Spec.Ports = append(service.Spec.Ports, corev1.ServicePort{
   303  		Port: int32(5000),
   304  	})
   305  	service.Spec.Selector = map[string]string{
   306  		"app": "jackal-injector",
   307  	}
   308  
   309  	// Attempt to purse the service silently
   310  	_ = c.DeleteService(JackalNamespaceName, "jackal-injector")
   311  
   312  	return c.CreateService(service)
   313  }
   314  
   315  // buildInjectionPod return a pod for injection with the appropriate containers to perform the injection.
   316  func (c *Cluster) buildInjectionPod(node, image string, payloadConfigmaps []string, payloadShasum string) (*corev1.Pod, error) {
   317  	pod := c.GeneratePod("injector", JackalNamespaceName)
   318  	executeMode := int32(0777)
   319  
   320  	pod.Labels["app"] = "jackal-injector"
   321  
   322  	// Ensure jackal agent doesn't break the injector on future runs
   323  	pod.Labels[agentLabel] = "ignore"
   324  
   325  	// Bind the pod to the node the image was found on
   326  	pod.Spec.NodeName = node
   327  
   328  	// Do not try to restart the pod as it will be deleted/re-created instead
   329  	pod.Spec.RestartPolicy = corev1.RestartPolicyNever
   330  
   331  	pod.Spec.Containers = []corev1.Container{
   332  		{
   333  			Name: "injector",
   334  
   335  			// An existing image already present on the cluster
   336  			Image: image,
   337  
   338  			// PullIfNotPresent because some distros provide a way (even in airgap) to pull images from local or direct-connected registries
   339  			ImagePullPolicy: corev1.PullIfNotPresent,
   340  
   341  			// This directory's contents come from the init container output
   342  			WorkingDir: "/jackal-init",
   343  
   344  			// Call the injector with shasum of the tarball
   345  			Command: []string{"/jackal-init/jackal-injector", payloadShasum},
   346  
   347  			// Shared mount between the init and regular containers
   348  			VolumeMounts: []corev1.VolumeMount{
   349  				{
   350  					Name:      "init",
   351  					MountPath: "/jackal-init/jackal-injector",
   352  					SubPath:   "jackal-injector",
   353  				},
   354  				{
   355  					Name:      "seed",
   356  					MountPath: "/jackal-seed",
   357  				},
   358  			},
   359  
   360  			// Readiness probe to optimize the pod startup time
   361  			ReadinessProbe: &corev1.Probe{
   362  				PeriodSeconds:    2,
   363  				SuccessThreshold: 1,
   364  				FailureThreshold: 10,
   365  				ProbeHandler: corev1.ProbeHandler{
   366  					HTTPGet: &corev1.HTTPGetAction{
   367  						Path: "/v2/",               // path to health check
   368  						Port: intstr.FromInt(5000), // port to health check
   369  					},
   370  				},
   371  			},
   372  
   373  			// Keep resources as light as possible as we aren't actually running the container's other binaries
   374  			Resources: corev1.ResourceRequirements{
   375  				Requests: corev1.ResourceList{
   376  					corev1.ResourceCPU:    injectorRequestedCPU,
   377  					corev1.ResourceMemory: injectorRequestedMemory,
   378  				},
   379  				Limits: corev1.ResourceList{
   380  					corev1.ResourceCPU:    injectorLimitCPU,
   381  					corev1.ResourceMemory: injectorLimitMemory,
   382  				},
   383  			},
   384  		},
   385  	}
   386  
   387  	pod.Spec.Volumes = []corev1.Volume{
   388  		// Contains the rust binary and collection of configmaps from the tarball (seed image).
   389  		{
   390  			Name: "init",
   391  			VolumeSource: corev1.VolumeSource{
   392  				ConfigMap: &corev1.ConfigMapVolumeSource{
   393  					LocalObjectReference: corev1.LocalObjectReference{
   394  						Name: "rust-binary",
   395  					},
   396  					DefaultMode: &executeMode,
   397  				},
   398  			},
   399  		},
   400  		// Empty directory to hold the seed image (new dir to avoid permission issues)
   401  		{
   402  			Name: "seed",
   403  			VolumeSource: corev1.VolumeSource{
   404  				EmptyDir: &corev1.EmptyDirVolumeSource{},
   405  			},
   406  		},
   407  	}
   408  
   409  	// Iterate over all the payload configmaps and add their mounts.
   410  	for _, filename := range payloadConfigmaps {
   411  		// Create the configmap volume from the given filename.
   412  		pod.Spec.Volumes = append(pod.Spec.Volumes, corev1.Volume{
   413  			Name: filename,
   414  			VolumeSource: corev1.VolumeSource{
   415  				ConfigMap: &corev1.ConfigMapVolumeSource{
   416  					LocalObjectReference: corev1.LocalObjectReference{
   417  						Name: filename,
   418  					},
   419  				},
   420  			},
   421  		})
   422  
   423  		// Create the volume mount to place the new volume in the working directory
   424  		pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
   425  			Name:      filename,
   426  			MountPath: fmt.Sprintf("/jackal-init/%s", filename),
   427  			SubPath:   filename,
   428  		})
   429  	}
   430  
   431  	return pod, nil
   432  }
   433  
   434  // GetImagesFromAvailableNodes checks for images on schedulable nodes within a cluster and returns
   435  func (c *Cluster) getImagesAndNodesForInjection(timeoutDuration time.Duration) (imageNodeMap, error) {
   436  	timeout := time.After(timeoutDuration)
   437  	result := make(imageNodeMap)
   438  
   439  	for {
   440  		select {
   441  
   442  		// On timeout abort
   443  		case <-timeout:
   444  			return nil, fmt.Errorf("get image list timed-out")
   445  
   446  		// After delay, try running
   447  		default:
   448  			pods, err := c.GetPods(corev1.NamespaceAll)
   449  			if err != nil {
   450  				return nil, fmt.Errorf("unable to get the list of pods in the cluster")
   451  			}
   452  
   453  		findImages:
   454  			for _, pod := range pods.Items {
   455  				nodeName := pod.Spec.NodeName
   456  
   457  				// If this pod doesn't have a node (i.e. is Pending), skip it
   458  				if nodeName == "" {
   459  					continue
   460  				}
   461  
   462  				nodeDetails, err := c.GetNode(nodeName)
   463  
   464  				if err != nil {
   465  					return nil, fmt.Errorf("unable to get the node %s", pod.Spec.NodeName)
   466  				}
   467  
   468  				if nodeDetails.Status.Allocatable.Cpu().Cmp(injectorRequestedCPU) < 0 ||
   469  					nodeDetails.Status.Allocatable.Memory().Cmp(injectorRequestedMemory) < 0 {
   470  					continue findImages
   471  				}
   472  
   473  				for _, taint := range nodeDetails.Spec.Taints {
   474  					if taint.Effect == corev1.TaintEffectNoSchedule || taint.Effect == corev1.TaintEffectNoExecute {
   475  						continue findImages
   476  					}
   477  				}
   478  
   479  				for _, container := range pod.Spec.InitContainers {
   480  					result[container.Image] = append(result[container.Image], nodeName)
   481  				}
   482  
   483  				for _, container := range pod.Spec.Containers {
   484  					result[container.Image] = append(result[container.Image], nodeName)
   485  				}
   486  
   487  				for _, container := range pod.Spec.EphemeralContainers {
   488  					result[container.Image] = append(result[container.Image], nodeName)
   489  				}
   490  			}
   491  		}
   492  
   493  		if len(result) < 1 {
   494  			c.Log("no images found: %w")
   495  			time.Sleep(2 * time.Second)
   496  		} else {
   497  			return result, nil
   498  		}
   499  	}
   500  }