github.com/Racer159/jackal@v0.32.7-0.20240401174413-0bd2339e4f2e/src/internal/packager/images/pull.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // SPDX-FileCopyrightText: 2021-Present The Jackal Authors
     3  
     4  // Package images provides functions for building and pushing images.
     5  package images
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"path/filepath"
    14  	"strings"
    15  
    16  	"github.com/Racer159/jackal/src/config"
    17  	"github.com/Racer159/jackal/src/pkg/layout"
    18  	"github.com/Racer159/jackal/src/pkg/message"
    19  	"github.com/Racer159/jackal/src/pkg/transform"
    20  	"github.com/Racer159/jackal/src/pkg/utils"
    21  	"github.com/defenseunicorns/pkg/helpers"
    22  	"github.com/google/go-containerregistry/pkg/crane"
    23  	"github.com/google/go-containerregistry/pkg/logs"
    24  	"github.com/google/go-containerregistry/pkg/name"
    25  	v1 "github.com/google/go-containerregistry/pkg/v1"
    26  	"github.com/google/go-containerregistry/pkg/v1/cache"
    27  	"github.com/google/go-containerregistry/pkg/v1/daemon"
    28  	"github.com/google/go-containerregistry/pkg/v1/empty"
    29  	clayout "github.com/google/go-containerregistry/pkg/v1/layout"
    30  	"github.com/google/go-containerregistry/pkg/v1/partial"
    31  	"github.com/google/go-containerregistry/pkg/v1/stream"
    32  	"github.com/moby/moby/client"
    33  )
    34  
    35  // ImgInfo wraps references/information about an image
    36  type ImgInfo struct {
    37  	RefInfo        transform.Image
    38  	Img            v1.Image
    39  	HasImageLayers bool
    40  }
    41  
    42  // PullAll pulls all of the images in the provided tag map.
    43  func (i *ImageConfig) PullAll() ([]ImgInfo, error) {
    44  	var (
    45  		longer            string
    46  		imageCount        = len(i.ImageList)
    47  		refInfoToImage    = map[transform.Image]v1.Image{}
    48  		referenceToDigest = make(map[string]string)
    49  		imgInfoList       []ImgInfo
    50  	)
    51  
    52  	type digestInfo struct {
    53  		refInfo transform.Image
    54  		digest  string
    55  	}
    56  
    57  	// Give some additional user feedback on larger image sets
    58  	if imageCount > 15 {
    59  		longer = "This step may take a couple of minutes to complete."
    60  	} else if imageCount > 5 {
    61  		longer = "This step may take several seconds to complete."
    62  	}
    63  
    64  	spinner := message.NewProgressSpinner("Loading metadata for %d images. %s", imageCount, longer)
    65  	defer spinner.Stop()
    66  
    67  	logs.Warn.SetOutput(&message.DebugWriter{})
    68  	logs.Progress.SetOutput(&message.DebugWriter{})
    69  
    70  	metadataImageConcurrency := helpers.NewConcurrencyTools[ImgInfo, error](len(i.ImageList))
    71  
    72  	defer metadataImageConcurrency.Cancel()
    73  
    74  	spinner.Updatef("Fetching image metadata (0 of %d)", len(i.ImageList))
    75  
    76  	// Spawn a goroutine for each image to load its metadata
    77  	for _, refInfo := range i.ImageList {
    78  		// Create a closure so that we can pass the src into the goroutine
    79  		refInfo := refInfo
    80  		go func() {
    81  
    82  			if metadataImageConcurrency.IsDone() {
    83  				return
    84  			}
    85  
    86  			actualSrc := refInfo.Reference
    87  			for k, v := range i.RegistryOverrides {
    88  				if strings.HasPrefix(refInfo.Reference, k) {
    89  					actualSrc = strings.Replace(refInfo.Reference, k, v, 1)
    90  				}
    91  			}
    92  
    93  			if metadataImageConcurrency.IsDone() {
    94  				return
    95  			}
    96  
    97  			img, hasImageLayers, err := i.PullImage(actualSrc, spinner)
    98  			if err != nil {
    99  				metadataImageConcurrency.ErrorChan <- fmt.Errorf("failed to pull %s: %w", actualSrc, err)
   100  				return
   101  			}
   102  
   103  			if metadataImageConcurrency.IsDone() {
   104  				return
   105  			}
   106  
   107  			metadataImageConcurrency.ProgressChan <- ImgInfo{RefInfo: refInfo, Img: img, HasImageLayers: hasImageLayers}
   108  		}()
   109  	}
   110  
   111  	onMetadataProgress := func(finishedImage ImgInfo, iteration int) {
   112  		spinner.Updatef("Fetching image metadata (%d of %d): %s", iteration+1, len(i.ImageList), finishedImage.RefInfo.Reference)
   113  		refInfoToImage[finishedImage.RefInfo] = finishedImage.Img
   114  		imgInfoList = append(imgInfoList, finishedImage)
   115  	}
   116  
   117  	onMetadataError := func(err error) error {
   118  		return err
   119  	}
   120  
   121  	if err := metadataImageConcurrency.WaitWithProgress(onMetadataProgress, onMetadataError); err != nil {
   122  		return nil, err
   123  	}
   124  
   125  	// Create the ImagePath directory
   126  	if err := helpers.CreateDirectory(i.ImagesPath, helpers.ReadExecuteAllWriteUser); err != nil {
   127  		return nil, fmt.Errorf("failed to create image path %s: %w", i.ImagesPath, err)
   128  	}
   129  
   130  	totalBytes := int64(0)
   131  	processedLayers := make(map[string]v1.Layer)
   132  	for refInfo, img := range refInfoToImage {
   133  		// Get the byte size for this image
   134  		layers, err := img.Layers()
   135  		if err != nil {
   136  			return nil, fmt.Errorf("unable to get layers for image %s: %w", refInfo.Reference, err)
   137  		}
   138  		for _, layer := range layers {
   139  			layerDigest, err := layer.Digest()
   140  			if err != nil {
   141  				return nil, fmt.Errorf("unable to get digest for image layer: %w", err)
   142  			}
   143  
   144  			// Only calculate this layer size if we haven't already looked at it
   145  			if _, ok := processedLayers[layerDigest.Hex]; !ok {
   146  				size, err := layer.Size()
   147  				if err != nil {
   148  					return nil, fmt.Errorf("unable to get size of layer: %w", err)
   149  				}
   150  				totalBytes += size
   151  				processedLayers[layerDigest.Hex] = layer
   152  			}
   153  
   154  		}
   155  	}
   156  	spinner.Updatef("Preparing image sources and cache for image pulling")
   157  
   158  	// Create special sauce crane Path object
   159  	// If it already exists use it
   160  	cranePath, err := clayout.FromPath(i.ImagesPath)
   161  	// Use crane pattern for creating OCI layout if it doesn't exist
   162  	if err != nil {
   163  		// If it doesn't exist create it
   164  		cranePath, err = clayout.Write(i.ImagesPath, empty.Index)
   165  		if err != nil {
   166  			return nil, err
   167  		}
   168  	}
   169  
   170  	for refInfo, img := range refInfoToImage {
   171  		imgDigest, err := img.Digest()
   172  		if err != nil {
   173  			return nil, fmt.Errorf("unable to get digest for image %s: %w", refInfo.Reference, err)
   174  		}
   175  		referenceToDigest[refInfo.Reference] = imgDigest.String()
   176  	}
   177  
   178  	spinner.Success()
   179  
   180  	// Create a thread to update a progress bar as we save the image files to disk
   181  	doneSaving := make(chan error)
   182  	updateText := fmt.Sprintf("Pulling %d images", imageCount)
   183  	go utils.RenderProgressBarForLocalDirWrite(i.ImagesPath, totalBytes, doneSaving, updateText, updateText)
   184  
   185  	// Spawn a goroutine for each layer to write it to disk using crane
   186  
   187  	layerWritingConcurrency := helpers.NewConcurrencyTools[bool, error](len(processedLayers))
   188  
   189  	defer layerWritingConcurrency.Cancel()
   190  
   191  	for _, layer := range processedLayers {
   192  		layer := layer
   193  		// Function is a combination of https://github.com/google/go-containerregistry/blob/v0.15.2/pkg/v1/layout/write.go#L270-L305
   194  		// and https://github.com/google/go-containerregistry/blob/v0.15.2/pkg/v1/layout/write.go#L198-L262
   195  		// with modifications. This allows us to dedupe layers for all images and write them concurrently.
   196  		go func() {
   197  			digest, err := layer.Digest()
   198  			if errors.Is(err, stream.ErrNotComputed) {
   199  				// Allow digest errors, since streams may not have calculated the hash
   200  				// yet. Instead, use an empty value, which will be transformed into a
   201  				// random file name with `os.CreateTemp` and the final digest will be
   202  				// calculated after writing to a temp file and before renaming to the
   203  				// final path.
   204  				digest = v1.Hash{Algorithm: "sha256", Hex: ""}
   205  			} else if err != nil {
   206  				layerWritingConcurrency.ErrorChan <- err
   207  				return
   208  			}
   209  
   210  			size, err := layer.Size()
   211  			if errors.Is(err, stream.ErrNotComputed) {
   212  				// Allow size errors, since streams may not have calculated the size
   213  				// yet. Instead, use -1 as a sentinel value meaning that no size
   214  				// comparison can be done and any sized blob file should be considered
   215  				// valid and not overwritten.
   216  				//
   217  				// TODO: Provide an option to always overwrite blobs.
   218  				size = -1
   219  			} else if err != nil {
   220  				layerWritingConcurrency.ErrorChan <- err
   221  				return
   222  			}
   223  
   224  			if layerWritingConcurrency.IsDone() {
   225  				return
   226  			}
   227  
   228  			readCloser, err := layer.Compressed()
   229  			if err != nil {
   230  				layerWritingConcurrency.ErrorChan <- err
   231  				return
   232  			}
   233  
   234  			// Create the directory for the blob if it doesn't exist
   235  			dir := filepath.Join(string(cranePath), "blobs", digest.Algorithm)
   236  			if err := helpers.CreateDirectory(dir, os.ModePerm); err != nil {
   237  				layerWritingConcurrency.ErrorChan <- err
   238  				return
   239  			}
   240  
   241  			if layerWritingConcurrency.IsDone() {
   242  				return
   243  			}
   244  
   245  			// Check if blob already exists and is the correct size
   246  			file := filepath.Join(dir, digest.Hex)
   247  			if s, err := os.Stat(file); err == nil && !s.IsDir() && (s.Size() == size || size == -1) {
   248  				layerWritingConcurrency.ProgressChan <- true
   249  				return
   250  			}
   251  
   252  			if layerWritingConcurrency.IsDone() {
   253  				return
   254  			}
   255  
   256  			// Write to a temporary file
   257  			w, err := os.CreateTemp(dir, digest.Hex)
   258  			if err != nil {
   259  				layerWritingConcurrency.ErrorChan <- err
   260  				return
   261  			}
   262  			// Delete temp file if an error is encountered before renaming
   263  			defer func() {
   264  				if err := os.Remove(w.Name()); err != nil && !errors.Is(err, os.ErrNotExist) {
   265  					message.Warnf("error removing temporary file after encountering an error while writing blob: %v", err)
   266  				}
   267  			}()
   268  
   269  			defer w.Close()
   270  
   271  			if layerWritingConcurrency.IsDone() {
   272  				return
   273  			}
   274  
   275  			// Write to file rename
   276  			if n, err := io.Copy(w, readCloser); err != nil {
   277  				layerWritingConcurrency.ErrorChan <- err
   278  				return
   279  			} else if size != -1 && n != size {
   280  				layerWritingConcurrency.ErrorChan <- fmt.Errorf("expected blob size %d, but only wrote %d", size, n)
   281  				return
   282  			}
   283  
   284  			if layerWritingConcurrency.IsDone() {
   285  				return
   286  			}
   287  
   288  			// Always close reader before renaming, since Close computes the digest in
   289  			// the case of streaming layers. If Close is not called explicitly, it will
   290  			// occur in a goroutine that is not guaranteed to succeed before renamer is
   291  			// called. When renamer is the layer's Digest method, it can return
   292  			// ErrNotComputed.
   293  			if err := readCloser.Close(); err != nil {
   294  				layerWritingConcurrency.ErrorChan <- err
   295  				return
   296  			}
   297  
   298  			// Always close file before renaming
   299  			if err := w.Close(); err != nil {
   300  				layerWritingConcurrency.ErrorChan <- err
   301  				return
   302  			}
   303  
   304  			// Rename file based on the final hash
   305  			renamePath := filepath.Join(string(cranePath), "blobs", digest.Algorithm, digest.Hex)
   306  			os.Rename(w.Name(), renamePath)
   307  
   308  			if layerWritingConcurrency.IsDone() {
   309  				return
   310  			}
   311  
   312  			layerWritingConcurrency.ProgressChan <- true
   313  		}()
   314  	}
   315  
   316  	onLayerWritingError := func(err error) error {
   317  		// Send a signal to the progress bar that we're done and wait for the thread to finish
   318  		doneSaving <- err
   319  		<-doneSaving
   320  		message.WarnErr(err, "Failed to write image layers, trying again up to 3 times...")
   321  		if strings.HasPrefix(err.Error(), "expected blob size") {
   322  			message.Warnf("Potential image cache corruption: %s - try clearing cache with \"jackal tools clear-cache\"", err.Error())
   323  		}
   324  		return err
   325  	}
   326  
   327  	if err := layerWritingConcurrency.WaitWithoutProgress(onLayerWritingError); err != nil {
   328  		return nil, err
   329  	}
   330  
   331  	imageSavingConcurrency := helpers.NewConcurrencyTools[digestInfo, error](len(refInfoToImage))
   332  
   333  	defer imageSavingConcurrency.Cancel()
   334  
   335  	// Spawn a goroutine for each image to write it's config and manifest to disk using crane
   336  	// All layers should already be in place so this should be extremely fast
   337  	for refInfo, img := range refInfoToImage {
   338  		// Create a closure so that we can pass the refInfo and img into the goroutine
   339  		refInfo, img := refInfo, img
   340  		go func() {
   341  			// Save the image via crane
   342  			err := cranePath.WriteImage(img)
   343  
   344  			if imageSavingConcurrency.IsDone() {
   345  				return
   346  			}
   347  
   348  			if err != nil {
   349  				// Check if the cache has been invalidated, and warn the user if so
   350  				if strings.HasPrefix(err.Error(), "error writing layer: expected blob size") {
   351  					message.Warnf("Potential image cache corruption: %s - try clearing cache with \"jackal tools clear-cache\"", err.Error())
   352  				}
   353  				imageSavingConcurrency.ErrorChan <- fmt.Errorf("error when trying to save the img (%s): %w", refInfo.Reference, err)
   354  				return
   355  			}
   356  
   357  			if imageSavingConcurrency.IsDone() {
   358  				return
   359  			}
   360  
   361  			// Get the image digest so we can set an annotation in the image.json later
   362  			imgDigest, err := img.Digest()
   363  			if err != nil {
   364  				imageSavingConcurrency.ErrorChan <- err
   365  				return
   366  			}
   367  
   368  			if imageSavingConcurrency.IsDone() {
   369  				return
   370  			}
   371  
   372  			imageSavingConcurrency.ProgressChan <- digestInfo{digest: imgDigest.String(), refInfo: refInfo}
   373  		}()
   374  	}
   375  
   376  	onImageSavingProgress := func(finishedImage digestInfo, _ int) {
   377  		referenceToDigest[finishedImage.refInfo.Reference] = finishedImage.digest
   378  	}
   379  
   380  	onImageSavingError := func(err error) error {
   381  		// Send a signal to the progress bar that we're done and wait for the thread to finish
   382  		doneSaving <- err
   383  		<-doneSaving
   384  		message.WarnErr(err, "Failed to write image config or manifest, trying again up to 3 times...")
   385  		return err
   386  	}
   387  
   388  	if err := imageSavingConcurrency.WaitWithProgress(onImageSavingProgress, onImageSavingError); err != nil {
   389  		return nil, err
   390  	}
   391  
   392  	// for every image sequentially append OCI descriptor
   393  
   394  	for refInfo, img := range refInfoToImage {
   395  		desc, err := partial.Descriptor(img)
   396  		if err != nil {
   397  			return nil, err
   398  		}
   399  
   400  		cranePath.AppendDescriptor(*desc)
   401  		if err != nil {
   402  			return nil, err
   403  		}
   404  
   405  		imgDigest, err := img.Digest()
   406  		if err != nil {
   407  			return nil, err
   408  		}
   409  
   410  		referenceToDigest[refInfo.Reference] = imgDigest.String()
   411  	}
   412  
   413  	if err := utils.AddImageNameAnnotation(i.ImagesPath, referenceToDigest); err != nil {
   414  		return nil, fmt.Errorf("unable to format OCI layout: %w", err)
   415  	}
   416  
   417  	// Send a signal to the progress bar that we're done and wait for the thread to finish
   418  	doneSaving <- nil
   419  	<-doneSaving
   420  
   421  	return imgInfoList, nil
   422  }
   423  
   424  // PullImage returns a v1.Image either by loading a local tarball or pulling from the wider internet.
   425  func (i *ImageConfig) PullImage(src string, spinner *message.Spinner) (img v1.Image, hasImageLayers bool, err error) {
   426  	cacheImage := false
   427  	// Load image tarballs from the local filesystem.
   428  	if strings.HasSuffix(src, ".tar") || strings.HasSuffix(src, ".tar.gz") || strings.HasSuffix(src, ".tgz") {
   429  		spinner.Updatef("Reading image tarball: %s", src)
   430  		img, err = crane.Load(src, config.GetCraneOptions(true, i.Architectures...)...)
   431  		if err != nil {
   432  			return nil, false, err
   433  		}
   434  	} else if _, err := crane.Manifest(src, config.GetCraneOptions(i.Insecure, i.Architectures...)...); err != nil {
   435  		// If crane is unable to pull the image, try to load it from the local docker daemon.
   436  		message.Notef("Falling back to local 'docker' images, failed to find the manifest on a remote: %s", err.Error())
   437  
   438  		// Parse the image reference to get the image name.
   439  		reference, err := name.ParseReference(src)
   440  		if err != nil {
   441  			return nil, false, fmt.Errorf("failed to parse image reference: %w", err)
   442  		}
   443  
   444  		// Attempt to connect to the local docker daemon.
   445  		ctx := context.TODO()
   446  		cli, err := client.NewClientWithOpts(client.FromEnv)
   447  		if err != nil {
   448  			return nil, false, fmt.Errorf("docker not available: %w", err)
   449  		}
   450  		cli.NegotiateAPIVersion(ctx)
   451  
   452  		// Inspect the image to get the size.
   453  		rawImg, _, err := cli.ImageInspectWithRaw(ctx, src)
   454  		if err != nil {
   455  			return nil, false, fmt.Errorf("failed to inspect image via docker: %w", err)
   456  		}
   457  
   458  		// Warn the user if the image is large.
   459  		if rawImg.Size > 750*1000*1000 {
   460  			message.Warnf("%s is %s and may take a very long time to load via docker. "+
   461  				"See https://docs.jackal.dev/docs/faq for suggestions on how to improve large local image loading operations.",
   462  				src, utils.ByteFormat(float64(rawImg.Size), 2))
   463  		}
   464  
   465  		// Use unbuffered opener to avoid OOM Kill issues https://github.com/Racer159/jackal/issues/1214.
   466  		// This will also take for ever to load large images.
   467  		if img, err = daemon.Image(reference, daemon.WithUnbufferedOpener()); err != nil {
   468  			return nil, false, fmt.Errorf("failed to load image from docker daemon: %w", err)
   469  		}
   470  	} else {
   471  		// Manifest was found, so use crane to pull the image.
   472  		if img, err = crane.Pull(src, config.GetCraneOptions(i.Insecure, i.Architectures...)...); err != nil {
   473  			return nil, false, fmt.Errorf("failed to pull image: %w", err)
   474  		}
   475  		cacheImage = true
   476  	}
   477  
   478  	hasImageLayers, err = utils.HasImageLayers(img)
   479  	if err != nil {
   480  		return nil, false, fmt.Errorf("failed to check image layer mediatype: %w", err)
   481  	}
   482  
   483  	if hasImageLayers && cacheImage {
   484  		spinner.Updatef("Preparing image %s", src)
   485  		imageCachePath := filepath.Join(config.GetAbsCachePath(), layout.ImagesDir)
   486  		img = cache.Image(img, cache.NewFilesystemCache(imageCachePath))
   487  	}
   488  
   489  	return img, hasImageLayers, nil
   490  
   491  }