github.com/Racer159/jackal@v0.32.7-0.20240401174413-0bd2339e4f2e/src/internal/packager/images/pull.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // SPDX-FileCopyrightText: 2021-Present The Jackal Authors 3 4 // Package images provides functions for building and pushing images. 5 package images 6 7 import ( 8 "context" 9 "errors" 10 "fmt" 11 "io" 12 "os" 13 "path/filepath" 14 "strings" 15 16 "github.com/Racer159/jackal/src/config" 17 "github.com/Racer159/jackal/src/pkg/layout" 18 "github.com/Racer159/jackal/src/pkg/message" 19 "github.com/Racer159/jackal/src/pkg/transform" 20 "github.com/Racer159/jackal/src/pkg/utils" 21 "github.com/defenseunicorns/pkg/helpers" 22 "github.com/google/go-containerregistry/pkg/crane" 23 "github.com/google/go-containerregistry/pkg/logs" 24 "github.com/google/go-containerregistry/pkg/name" 25 v1 "github.com/google/go-containerregistry/pkg/v1" 26 "github.com/google/go-containerregistry/pkg/v1/cache" 27 "github.com/google/go-containerregistry/pkg/v1/daemon" 28 "github.com/google/go-containerregistry/pkg/v1/empty" 29 clayout "github.com/google/go-containerregistry/pkg/v1/layout" 30 "github.com/google/go-containerregistry/pkg/v1/partial" 31 "github.com/google/go-containerregistry/pkg/v1/stream" 32 "github.com/moby/moby/client" 33 ) 34 35 // ImgInfo wraps references/information about an image 36 type ImgInfo struct { 37 RefInfo transform.Image 38 Img v1.Image 39 HasImageLayers bool 40 } 41 42 // PullAll pulls all of the images in the provided tag map. 43 func (i *ImageConfig) PullAll() ([]ImgInfo, error) { 44 var ( 45 longer string 46 imageCount = len(i.ImageList) 47 refInfoToImage = map[transform.Image]v1.Image{} 48 referenceToDigest = make(map[string]string) 49 imgInfoList []ImgInfo 50 ) 51 52 type digestInfo struct { 53 refInfo transform.Image 54 digest string 55 } 56 57 // Give some additional user feedback on larger image sets 58 if imageCount > 15 { 59 longer = "This step may take a couple of minutes to complete." 60 } else if imageCount > 5 { 61 longer = "This step may take several seconds to complete." 62 } 63 64 spinner := message.NewProgressSpinner("Loading metadata for %d images. %s", imageCount, longer) 65 defer spinner.Stop() 66 67 logs.Warn.SetOutput(&message.DebugWriter{}) 68 logs.Progress.SetOutput(&message.DebugWriter{}) 69 70 metadataImageConcurrency := helpers.NewConcurrencyTools[ImgInfo, error](len(i.ImageList)) 71 72 defer metadataImageConcurrency.Cancel() 73 74 spinner.Updatef("Fetching image metadata (0 of %d)", len(i.ImageList)) 75 76 // Spawn a goroutine for each image to load its metadata 77 for _, refInfo := range i.ImageList { 78 // Create a closure so that we can pass the src into the goroutine 79 refInfo := refInfo 80 go func() { 81 82 if metadataImageConcurrency.IsDone() { 83 return 84 } 85 86 actualSrc := refInfo.Reference 87 for k, v := range i.RegistryOverrides { 88 if strings.HasPrefix(refInfo.Reference, k) { 89 actualSrc = strings.Replace(refInfo.Reference, k, v, 1) 90 } 91 } 92 93 if metadataImageConcurrency.IsDone() { 94 return 95 } 96 97 img, hasImageLayers, err := i.PullImage(actualSrc, spinner) 98 if err != nil { 99 metadataImageConcurrency.ErrorChan <- fmt.Errorf("failed to pull %s: %w", actualSrc, err) 100 return 101 } 102 103 if metadataImageConcurrency.IsDone() { 104 return 105 } 106 107 metadataImageConcurrency.ProgressChan <- ImgInfo{RefInfo: refInfo, Img: img, HasImageLayers: hasImageLayers} 108 }() 109 } 110 111 onMetadataProgress := func(finishedImage ImgInfo, iteration int) { 112 spinner.Updatef("Fetching image metadata (%d of %d): %s", iteration+1, len(i.ImageList), finishedImage.RefInfo.Reference) 113 refInfoToImage[finishedImage.RefInfo] = finishedImage.Img 114 imgInfoList = append(imgInfoList, finishedImage) 115 } 116 117 onMetadataError := func(err error) error { 118 return err 119 } 120 121 if err := metadataImageConcurrency.WaitWithProgress(onMetadataProgress, onMetadataError); err != nil { 122 return nil, err 123 } 124 125 // Create the ImagePath directory 126 if err := helpers.CreateDirectory(i.ImagesPath, helpers.ReadExecuteAllWriteUser); err != nil { 127 return nil, fmt.Errorf("failed to create image path %s: %w", i.ImagesPath, err) 128 } 129 130 totalBytes := int64(0) 131 processedLayers := make(map[string]v1.Layer) 132 for refInfo, img := range refInfoToImage { 133 // Get the byte size for this image 134 layers, err := img.Layers() 135 if err != nil { 136 return nil, fmt.Errorf("unable to get layers for image %s: %w", refInfo.Reference, err) 137 } 138 for _, layer := range layers { 139 layerDigest, err := layer.Digest() 140 if err != nil { 141 return nil, fmt.Errorf("unable to get digest for image layer: %w", err) 142 } 143 144 // Only calculate this layer size if we haven't already looked at it 145 if _, ok := processedLayers[layerDigest.Hex]; !ok { 146 size, err := layer.Size() 147 if err != nil { 148 return nil, fmt.Errorf("unable to get size of layer: %w", err) 149 } 150 totalBytes += size 151 processedLayers[layerDigest.Hex] = layer 152 } 153 154 } 155 } 156 spinner.Updatef("Preparing image sources and cache for image pulling") 157 158 // Create special sauce crane Path object 159 // If it already exists use it 160 cranePath, err := clayout.FromPath(i.ImagesPath) 161 // Use crane pattern for creating OCI layout if it doesn't exist 162 if err != nil { 163 // If it doesn't exist create it 164 cranePath, err = clayout.Write(i.ImagesPath, empty.Index) 165 if err != nil { 166 return nil, err 167 } 168 } 169 170 for refInfo, img := range refInfoToImage { 171 imgDigest, err := img.Digest() 172 if err != nil { 173 return nil, fmt.Errorf("unable to get digest for image %s: %w", refInfo.Reference, err) 174 } 175 referenceToDigest[refInfo.Reference] = imgDigest.String() 176 } 177 178 spinner.Success() 179 180 // Create a thread to update a progress bar as we save the image files to disk 181 doneSaving := make(chan error) 182 updateText := fmt.Sprintf("Pulling %d images", imageCount) 183 go utils.RenderProgressBarForLocalDirWrite(i.ImagesPath, totalBytes, doneSaving, updateText, updateText) 184 185 // Spawn a goroutine for each layer to write it to disk using crane 186 187 layerWritingConcurrency := helpers.NewConcurrencyTools[bool, error](len(processedLayers)) 188 189 defer layerWritingConcurrency.Cancel() 190 191 for _, layer := range processedLayers { 192 layer := layer 193 // Function is a combination of https://github.com/google/go-containerregistry/blob/v0.15.2/pkg/v1/layout/write.go#L270-L305 194 // and https://github.com/google/go-containerregistry/blob/v0.15.2/pkg/v1/layout/write.go#L198-L262 195 // with modifications. This allows us to dedupe layers for all images and write them concurrently. 196 go func() { 197 digest, err := layer.Digest() 198 if errors.Is(err, stream.ErrNotComputed) { 199 // Allow digest errors, since streams may not have calculated the hash 200 // yet. Instead, use an empty value, which will be transformed into a 201 // random file name with `os.CreateTemp` and the final digest will be 202 // calculated after writing to a temp file and before renaming to the 203 // final path. 204 digest = v1.Hash{Algorithm: "sha256", Hex: ""} 205 } else if err != nil { 206 layerWritingConcurrency.ErrorChan <- err 207 return 208 } 209 210 size, err := layer.Size() 211 if errors.Is(err, stream.ErrNotComputed) { 212 // Allow size errors, since streams may not have calculated the size 213 // yet. Instead, use -1 as a sentinel value meaning that no size 214 // comparison can be done and any sized blob file should be considered 215 // valid and not overwritten. 216 // 217 // TODO: Provide an option to always overwrite blobs. 218 size = -1 219 } else if err != nil { 220 layerWritingConcurrency.ErrorChan <- err 221 return 222 } 223 224 if layerWritingConcurrency.IsDone() { 225 return 226 } 227 228 readCloser, err := layer.Compressed() 229 if err != nil { 230 layerWritingConcurrency.ErrorChan <- err 231 return 232 } 233 234 // Create the directory for the blob if it doesn't exist 235 dir := filepath.Join(string(cranePath), "blobs", digest.Algorithm) 236 if err := helpers.CreateDirectory(dir, os.ModePerm); err != nil { 237 layerWritingConcurrency.ErrorChan <- err 238 return 239 } 240 241 if layerWritingConcurrency.IsDone() { 242 return 243 } 244 245 // Check if blob already exists and is the correct size 246 file := filepath.Join(dir, digest.Hex) 247 if s, err := os.Stat(file); err == nil && !s.IsDir() && (s.Size() == size || size == -1) { 248 layerWritingConcurrency.ProgressChan <- true 249 return 250 } 251 252 if layerWritingConcurrency.IsDone() { 253 return 254 } 255 256 // Write to a temporary file 257 w, err := os.CreateTemp(dir, digest.Hex) 258 if err != nil { 259 layerWritingConcurrency.ErrorChan <- err 260 return 261 } 262 // Delete temp file if an error is encountered before renaming 263 defer func() { 264 if err := os.Remove(w.Name()); err != nil && !errors.Is(err, os.ErrNotExist) { 265 message.Warnf("error removing temporary file after encountering an error while writing blob: %v", err) 266 } 267 }() 268 269 defer w.Close() 270 271 if layerWritingConcurrency.IsDone() { 272 return 273 } 274 275 // Write to file rename 276 if n, err := io.Copy(w, readCloser); err != nil { 277 layerWritingConcurrency.ErrorChan <- err 278 return 279 } else if size != -1 && n != size { 280 layerWritingConcurrency.ErrorChan <- fmt.Errorf("expected blob size %d, but only wrote %d", size, n) 281 return 282 } 283 284 if layerWritingConcurrency.IsDone() { 285 return 286 } 287 288 // Always close reader before renaming, since Close computes the digest in 289 // the case of streaming layers. If Close is not called explicitly, it will 290 // occur in a goroutine that is not guaranteed to succeed before renamer is 291 // called. When renamer is the layer's Digest method, it can return 292 // ErrNotComputed. 293 if err := readCloser.Close(); err != nil { 294 layerWritingConcurrency.ErrorChan <- err 295 return 296 } 297 298 // Always close file before renaming 299 if err := w.Close(); err != nil { 300 layerWritingConcurrency.ErrorChan <- err 301 return 302 } 303 304 // Rename file based on the final hash 305 renamePath := filepath.Join(string(cranePath), "blobs", digest.Algorithm, digest.Hex) 306 os.Rename(w.Name(), renamePath) 307 308 if layerWritingConcurrency.IsDone() { 309 return 310 } 311 312 layerWritingConcurrency.ProgressChan <- true 313 }() 314 } 315 316 onLayerWritingError := func(err error) error { 317 // Send a signal to the progress bar that we're done and wait for the thread to finish 318 doneSaving <- err 319 <-doneSaving 320 message.WarnErr(err, "Failed to write image layers, trying again up to 3 times...") 321 if strings.HasPrefix(err.Error(), "expected blob size") { 322 message.Warnf("Potential image cache corruption: %s - try clearing cache with \"jackal tools clear-cache\"", err.Error()) 323 } 324 return err 325 } 326 327 if err := layerWritingConcurrency.WaitWithoutProgress(onLayerWritingError); err != nil { 328 return nil, err 329 } 330 331 imageSavingConcurrency := helpers.NewConcurrencyTools[digestInfo, error](len(refInfoToImage)) 332 333 defer imageSavingConcurrency.Cancel() 334 335 // Spawn a goroutine for each image to write it's config and manifest to disk using crane 336 // All layers should already be in place so this should be extremely fast 337 for refInfo, img := range refInfoToImage { 338 // Create a closure so that we can pass the refInfo and img into the goroutine 339 refInfo, img := refInfo, img 340 go func() { 341 // Save the image via crane 342 err := cranePath.WriteImage(img) 343 344 if imageSavingConcurrency.IsDone() { 345 return 346 } 347 348 if err != nil { 349 // Check if the cache has been invalidated, and warn the user if so 350 if strings.HasPrefix(err.Error(), "error writing layer: expected blob size") { 351 message.Warnf("Potential image cache corruption: %s - try clearing cache with \"jackal tools clear-cache\"", err.Error()) 352 } 353 imageSavingConcurrency.ErrorChan <- fmt.Errorf("error when trying to save the img (%s): %w", refInfo.Reference, err) 354 return 355 } 356 357 if imageSavingConcurrency.IsDone() { 358 return 359 } 360 361 // Get the image digest so we can set an annotation in the image.json later 362 imgDigest, err := img.Digest() 363 if err != nil { 364 imageSavingConcurrency.ErrorChan <- err 365 return 366 } 367 368 if imageSavingConcurrency.IsDone() { 369 return 370 } 371 372 imageSavingConcurrency.ProgressChan <- digestInfo{digest: imgDigest.String(), refInfo: refInfo} 373 }() 374 } 375 376 onImageSavingProgress := func(finishedImage digestInfo, _ int) { 377 referenceToDigest[finishedImage.refInfo.Reference] = finishedImage.digest 378 } 379 380 onImageSavingError := func(err error) error { 381 // Send a signal to the progress bar that we're done and wait for the thread to finish 382 doneSaving <- err 383 <-doneSaving 384 message.WarnErr(err, "Failed to write image config or manifest, trying again up to 3 times...") 385 return err 386 } 387 388 if err := imageSavingConcurrency.WaitWithProgress(onImageSavingProgress, onImageSavingError); err != nil { 389 return nil, err 390 } 391 392 // for every image sequentially append OCI descriptor 393 394 for refInfo, img := range refInfoToImage { 395 desc, err := partial.Descriptor(img) 396 if err != nil { 397 return nil, err 398 } 399 400 cranePath.AppendDescriptor(*desc) 401 if err != nil { 402 return nil, err 403 } 404 405 imgDigest, err := img.Digest() 406 if err != nil { 407 return nil, err 408 } 409 410 referenceToDigest[refInfo.Reference] = imgDigest.String() 411 } 412 413 if err := utils.AddImageNameAnnotation(i.ImagesPath, referenceToDigest); err != nil { 414 return nil, fmt.Errorf("unable to format OCI layout: %w", err) 415 } 416 417 // Send a signal to the progress bar that we're done and wait for the thread to finish 418 doneSaving <- nil 419 <-doneSaving 420 421 return imgInfoList, nil 422 } 423 424 // PullImage returns a v1.Image either by loading a local tarball or pulling from the wider internet. 425 func (i *ImageConfig) PullImage(src string, spinner *message.Spinner) (img v1.Image, hasImageLayers bool, err error) { 426 cacheImage := false 427 // Load image tarballs from the local filesystem. 428 if strings.HasSuffix(src, ".tar") || strings.HasSuffix(src, ".tar.gz") || strings.HasSuffix(src, ".tgz") { 429 spinner.Updatef("Reading image tarball: %s", src) 430 img, err = crane.Load(src, config.GetCraneOptions(true, i.Architectures...)...) 431 if err != nil { 432 return nil, false, err 433 } 434 } else if _, err := crane.Manifest(src, config.GetCraneOptions(i.Insecure, i.Architectures...)...); err != nil { 435 // If crane is unable to pull the image, try to load it from the local docker daemon. 436 message.Notef("Falling back to local 'docker' images, failed to find the manifest on a remote: %s", err.Error()) 437 438 // Parse the image reference to get the image name. 439 reference, err := name.ParseReference(src) 440 if err != nil { 441 return nil, false, fmt.Errorf("failed to parse image reference: %w", err) 442 } 443 444 // Attempt to connect to the local docker daemon. 445 ctx := context.TODO() 446 cli, err := client.NewClientWithOpts(client.FromEnv) 447 if err != nil { 448 return nil, false, fmt.Errorf("docker not available: %w", err) 449 } 450 cli.NegotiateAPIVersion(ctx) 451 452 // Inspect the image to get the size. 453 rawImg, _, err := cli.ImageInspectWithRaw(ctx, src) 454 if err != nil { 455 return nil, false, fmt.Errorf("failed to inspect image via docker: %w", err) 456 } 457 458 // Warn the user if the image is large. 459 if rawImg.Size > 750*1000*1000 { 460 message.Warnf("%s is %s and may take a very long time to load via docker. "+ 461 "See https://docs.jackal.dev/docs/faq for suggestions on how to improve large local image loading operations.", 462 src, utils.ByteFormat(float64(rawImg.Size), 2)) 463 } 464 465 // Use unbuffered opener to avoid OOM Kill issues https://github.com/Racer159/jackal/issues/1214. 466 // This will also take for ever to load large images. 467 if img, err = daemon.Image(reference, daemon.WithUnbufferedOpener()); err != nil { 468 return nil, false, fmt.Errorf("failed to load image from docker daemon: %w", err) 469 } 470 } else { 471 // Manifest was found, so use crane to pull the image. 472 if img, err = crane.Pull(src, config.GetCraneOptions(i.Insecure, i.Architectures...)...); err != nil { 473 return nil, false, fmt.Errorf("failed to pull image: %w", err) 474 } 475 cacheImage = true 476 } 477 478 hasImageLayers, err = utils.HasImageLayers(img) 479 if err != nil { 480 return nil, false, fmt.Errorf("failed to check image layer mediatype: %w", err) 481 } 482 483 if hasImageLayers && cacheImage { 484 spinner.Updatef("Preparing image %s", src) 485 imageCachePath := filepath.Join(config.GetAbsCachePath(), layout.ImagesDir) 486 img = cache.Image(img, cache.NewFilesystemCache(imageCachePath)) 487 } 488 489 return img, hasImageLayers, nil 490 491 }