github.com/google/osv-scalibr@v0.4.1/artifact/image/unpack/unpack.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package unpack contains functions to unpack an image.
    16  package unpack
    17  
    18  import (
    19  	"bytes"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"io/fs"
    24  	"os"
    25  	"path"
    26  	"path/filepath"
    27  	"strings"
    28  
    29  	"archive/tar"
    30  
    31  	v1 "github.com/google/go-containerregistry/pkg/v1"
    32  	"github.com/google/osv-scalibr/artifact/image/require"
    33  	"github.com/google/osv-scalibr/artifact/image/symlink"
    34  	scalibrtar "github.com/google/osv-scalibr/artifact/image/tar"
    35  	"github.com/google/osv-scalibr/log"
    36  )
    37  
    38  const (
    39  	// SymlinkRetain specifies that the symlink should be retained as a symlink.
    40  	SymlinkRetain SymlinkResolution = "symlink_retain"
    41  	// SymlinkIgnore specifies that the symlink should be ignored.
    42  	SymlinkIgnore SymlinkResolution = "symlink_ignore"
    43  
    44  	// SymlinkErrLog specifies that errors resolving symlinks are logged but not returned. Image unpacking continues.
    45  	SymlinkErrLog SymlinkErrStrategy = "symlink_err_log"
    46  	// SymlinkErrReturn specifies that errors resolving symlinks are returned, which stops unpacking the image.
    47  	SymlinkErrReturn SymlinkErrStrategy = "symlink_err_return"
    48  
    49  	// DefaultMaxPass is the default maximum number of times the image is unpacked to resolve symlinks.
    50  	DefaultMaxPass = 3
    51  	// DefaultMaxFileBytes is the default maximum size of files that will be unpacked. Larger files are ignored.
    52  	// The max is large because some files, like kube-apiserver, are ~115MB.
    53  	DefaultMaxFileBytes = 1024 * 1024 * 1024 // 1GB
    54  )
    55  
    56  // SymlinkResolution specifies how to resolve symlinks.
    57  type SymlinkResolution string
    58  
    59  // SymlinkErrStrategy how to handle errors resolving symlinks.
    60  type SymlinkErrStrategy string
    61  
    62  // Unpacker unpacks the image.
    63  type Unpacker struct {
    64  	SymlinkResolution  SymlinkResolution
    65  	SymlinkErrStrategy SymlinkErrStrategy
    66  	MaxPass            int
    67  	MaxSizeBytes       int64
    68  	Requirer           require.FileRequirer
    69  }
    70  
    71  // UnpackerConfig configures how to unpack the image.
    72  type UnpackerConfig struct {
    73  	// SymlinkResolution specifies how to resolve symlinks.
    74  	SymlinkResolution SymlinkResolution
    75  	// SymlinkErrStrategy specifies how to handle symlink errors.
    76  	SymlinkErrStrategy SymlinkErrStrategy
    77  	// MaxPass limits the times the image is unpacked to resolve symlinks. 0 or less is essentially "unset" and will default to 2.
    78  	MaxPass int
    79  	// MaxFileBytes is the maximum size of files that will be unpacked. Larger files are ignored.
    80  	MaxFileBytes int64
    81  	// Requirer's FileRequired function is run on each file during unpacking. The file is unpacked if true and ignored if false.
    82  	Requirer require.FileRequirer
    83  }
    84  
    85  // DefaultUnpackerConfig returns default configurations for a new Unpacker.
    86  func DefaultUnpackerConfig() *UnpackerConfig {
    87  	return &UnpackerConfig{
    88  		SymlinkResolution:  SymlinkRetain,
    89  		SymlinkErrStrategy: SymlinkErrLog,
    90  		MaxPass:            DefaultMaxPass,
    91  		MaxFileBytes:       DefaultMaxFileBytes,
    92  		Requirer:           &require.FileRequirerAll{},
    93  	}
    94  }
    95  
    96  // WithMaxPass returns a UnpackerConfig with the specified MaxPass param.
    97  func (cfg *UnpackerConfig) WithMaxPass(maxPass int) *UnpackerConfig {
    98  	cfg.MaxPass = maxPass
    99  	return cfg
   100  }
   101  
   102  // WithMaxFileBytes returns a UnpackerConfig with the specified MaxFileBytes param.
   103  func (cfg *UnpackerConfig) WithMaxFileBytes(maxFileBytes int64) *UnpackerConfig {
   104  	cfg.MaxFileBytes = maxFileBytes
   105  	return cfg
   106  }
   107  
   108  // WithSymlinkResolution returns a UnpackerConfig with the specified SymlinkResolution param.
   109  func (cfg *UnpackerConfig) WithSymlinkResolution(resolution SymlinkResolution) *UnpackerConfig {
   110  	cfg.SymlinkResolution = resolution
   111  	return cfg
   112  }
   113  
   114  // WithRequirer returns a UnpackerConfig with the specified FileRequirer param.
   115  func (cfg *UnpackerConfig) WithRequirer(requirer require.FileRequirer) *UnpackerConfig {
   116  	cfg.Requirer = requirer
   117  	return cfg
   118  }
   119  
   120  // NewUnpacker creates a new Unpacker.
   121  func NewUnpacker(cfg *UnpackerConfig) (*Unpacker, error) {
   122  	if cfg.SymlinkResolution == "" {
   123  		return nil, errors.New("cfg.SymlinkResolution was not specified")
   124  	}
   125  	if cfg.SymlinkErrStrategy == "" {
   126  		return nil, errors.New("cfg.SymlinkErrStrategy was not specified")
   127  	}
   128  
   129  	maxPass := DefaultMaxPass
   130  	if cfg.MaxPass > 0 {
   131  		maxPass = cfg.MaxPass
   132  	}
   133  	maxFileBytes := cfg.MaxFileBytes
   134  	if cfg.MaxFileBytes <= 0 {
   135  		maxFileBytes = 1024 * 1024 * 1024 * 1024 // 1TB
   136  	}
   137  
   138  	if cfg.Requirer == nil {
   139  		return nil, errors.New("cfg.Requirer cannot be nil")
   140  	}
   141  
   142  	return &Unpacker{
   143  		SymlinkResolution:  cfg.SymlinkResolution,
   144  		SymlinkErrStrategy: cfg.SymlinkErrStrategy,
   145  		MaxPass:            maxPass,
   146  		MaxSizeBytes:       maxFileBytes,
   147  		Requirer:           cfg.Requirer,
   148  	}, nil
   149  }
   150  
   151  // UnpackSquashed squashes the layers of image then copies its contents to dir.
   152  func (u *Unpacker) UnpackSquashed(dir string, image v1.Image) error {
   153  	if u.SymlinkResolution == SymlinkIgnore {
   154  		return fmt.Errorf("symlink resolution strategy %q is not supported", u.SymlinkResolution)
   155  	}
   156  
   157  	if dir == "" {
   158  		return fmt.Errorf("dir cannot be root %q", dir)
   159  	}
   160  	if image == nil {
   161  		return errors.New("image cannot be nil")
   162  	}
   163  
   164  	tarDir, err := os.MkdirTemp("", "image-tar-tmp-*")
   165  	if err != nil {
   166  		return fmt.Errorf("failed to create temporary directory for image tar: %w", err)
   167  	}
   168  	defer func() {
   169  		if err := os.RemoveAll(tarDir); err != nil {
   170  			log.Errorf("failed to remove temporary directory for image tar %q: %v", tarDir, err)
   171  		}
   172  	}()
   173  	tarPath := filepath.Join(tarDir, "image.tar")
   174  	defer func() {
   175  		if err := os.Remove(tarPath); err != nil {
   176  			log.Errorf("failed to remove temporary tar file %q: %v", tarPath, err)
   177  		}
   178  	}()
   179  	if err := scalibrtar.SaveToTarball(tarPath, image); err != nil {
   180  		if strings.Contains(err.Error(), "invalid tar header") {
   181  			return fmt.Errorf("invalid tar header when saving image to tarball (error message %q) with %q", tarPath, err.Error())
   182  		}
   183  		return fmt.Errorf("failed to save image to tarball %q: %w", tarPath, err)
   184  	}
   185  
   186  	return u.UnpackSquashedFromTarball(dir, tarPath)
   187  }
   188  
   189  // UnpackSquashedFromTarball squashes the layers of an image from a tarball then
   190  // copies its contents to dir.
   191  func (u *Unpacker) UnpackSquashedFromTarball(dir string, tarPath string) error {
   192  	// requiredTargets stores targets that symlinks point to.
   193  	// This is needed because the symlink may be required by u.requirer, but the target may not be.
   194  	requiredTargets := make(map[string]bool)
   195  	for pass := range u.MaxPass {
   196  		finalPass := false
   197  		// Resolve symlinks on the last pass once all potential target files have been unpacked.
   198  		if pass == u.MaxPass-1 {
   199  			finalPass = true
   200  		}
   201  		reader, err := os.Open(tarPath)
   202  		if err != nil {
   203  			log.Errorf("Failed to open tarball of image at %q: %v", tarPath, err)
   204  			return fmt.Errorf("failed to open tarball of image at %q: %w", tarPath, err)
   205  		}
   206  		log.Infof("Unpacking pass %d of %d", pass+1, u.MaxPass)
   207  		requiredTargets, err = unpack(dir, reader, u.SymlinkResolution, u.SymlinkErrStrategy, u.Requirer, requiredTargets, finalPass, u.MaxSizeBytes)
   208  		_ = reader.Close()
   209  		if err != nil {
   210  			return err
   211  		}
   212  	}
   213  
   214  	// Remove symlinks that have a nonexistent destination file or nonexistent destination directory.
   215  	if err := symlink.RemoveObsoleteSymlinks(dir); err != nil {
   216  		return fmt.Errorf("failed to remove obsolete symlinks from dir %q: %w", dir, err)
   217  	}
   218  
   219  	return nil
   220  }
   221  
   222  // safeWriteFile is a helper function that uses os.Root to write to a file with the specified
   223  // permissions.
   224  func safeWriteFile(root *os.Root, path string, content []byte, perm os.FileMode) error {
   225  	// os.Root.OpenFile only supports the 9 least significant bits (0o777),
   226  	// so ensure we strip any other bits (like setuid, sticky bit, etc.)
   227  	normalizedPerm := perm & 0o777
   228  
   229  	file, err := root.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, normalizedPerm)
   230  	if err != nil {
   231  		log.Errorf("failed to open file %q: %v", path, err)
   232  		return fmt.Errorf("failed to open file %q: %w", path, err)
   233  	}
   234  
   235  	_, err = file.Write(content)
   236  	if err != nil {
   237  		log.Errorf("failed to write file %q: %v", path, err)
   238  		return fmt.Errorf("failed to write file %q: %w", path, err)
   239  	}
   240  
   241  	if err := file.Close(); err != nil {
   242  		log.Errorf("failed to close file %q: %v", path, err)
   243  		return fmt.Errorf("failed to close file %q: %w", path, err)
   244  	}
   245  	return nil
   246  }
   247  
   248  func unpack(dir string, reader io.Reader, symlinkResolution SymlinkResolution, symlinkErrStrategy SymlinkErrStrategy, requirer require.FileRequirer, requiredTargets map[string]bool, finalPass bool, maxSizeBytes int64) (map[string]bool, error) {
   249  	tarReader := tar.NewReader(reader)
   250  
   251  	root, err := os.OpenRoot(dir)
   252  	if err != nil {
   253  		return nil, fmt.Errorf("failed to open root directory: %w", err)
   254  	}
   255  	defer root.Close()
   256  
   257  	// Defensive copy of requiredTargets to avoid modifying the original.
   258  	currRequiredTargets := make(map[string]bool)
   259  	for t := range requiredTargets {
   260  		currRequiredTargets[t] = true
   261  	}
   262  
   263  	for {
   264  		header, err := tarReader.Next()
   265  		if err != nil {
   266  			if errors.Is(err, io.EOF) {
   267  				break
   268  			}
   269  			return nil, fmt.Errorf("failed to read next header in tarball: %w", err)
   270  		}
   271  
   272  		if header.Size > maxSizeBytes {
   273  			log.Infof("skipping file %q because its size (%d bytes) is larger than the max size (%d bytes)", header.Name, header.Size, maxSizeBytes)
   274  			continue
   275  		}
   276  
   277  		cleanPath := path.Clean(header.Name)
   278  		fullPath := path.Join(dir, cleanPath)
   279  
   280  		// Skip files already unpacked.
   281  		// Lstat is used instead of Stat to avoid following symlinks, because their targets may not exist yet.
   282  		if _, err = root.Lstat(fullPath); err == nil {
   283  			continue
   284  		}
   285  
   286  		// Skip files that are not required by extractors and are not targets of required symlinks.
   287  		// Try multiple paths variations
   288  		// (with parent dir, without leading slash, with leading slash). For example:
   289  		// - `fullPath`: `tmp/12345/etc/os-release`. This is used when actually writing the file to disk.
   290  		// - `cleanPath`: `etc/os-release`. This is used when checking if the file is required.
   291  		// - `filepath.Join("/", cleanPath)`: `/etc/os-release`. This is used when checking if the file is required.
   292  		required := false
   293  		for _, p := range []string{fullPath, cleanPath, filepath.Join("/", cleanPath)} {
   294  			if requirer.FileRequired(p, header.FileInfo()) {
   295  				required = true
   296  				break
   297  			}
   298  			if _, ok := currRequiredTargets[p]; ok {
   299  				required = true
   300  				break
   301  			}
   302  		}
   303  		if !required {
   304  			continue
   305  		}
   306  
   307  		switch header.Typeflag {
   308  		case tar.TypeReg:
   309  			buf := new(bytes.Buffer)
   310  			_, err = io.Copy(buf, tarReader)
   311  			if err != nil {
   312  				return nil, err
   313  			}
   314  
   315  			content := buf.Bytes()
   316  
   317  			parent := filepath.Dir(fullPath)
   318  			if err := os.MkdirAll(parent, fs.ModePerm); err != nil {
   319  				log.Errorf("failed to create directory %q for file %q: %v", parent, fullPath, err)
   320  				return nil, fmt.Errorf("failed to create directory %q for file %q: %w", parent, fullPath, err)
   321  			}
   322  
   323  			// Retain the original file permission but update it so we can always read and write the file.
   324  			modeWithOwnerReadWrite := header.FileInfo().Mode() | 0600
   325  
   326  			err = safeWriteFile(root, cleanPath, content, modeWithOwnerReadWrite)
   327  			if err != nil {
   328  				// TODO: b/412437775 - The error handling below is not ideal. It will become a mess if other
   329  				// exceptions are added. Unfortunately, the os package does not export the underlying
   330  				// error, so we have to do string matching for now.
   331  				if strings.Contains(err.Error(), "path escapes from parent") {
   332  					log.Warnf("path escapes from parent, potential path traversal attack detected: %q: %v", fullPath, err)
   333  					continue
   334  				}
   335  				if strings.Contains(err.Error(), "too many levels of symbolic links") {
   336  					log.Warnf("too many levels of symbolic links found: %q: %v", fullPath, err)
   337  					continue
   338  				}
   339  				return nil, err
   340  			}
   341  
   342  			// TODO: b/406760694 - Remove this once the bug is fixed.
   343  
   344  		case tar.TypeLink, tar.TypeSymlink:
   345  			parent := filepath.Dir(fullPath)
   346  			if err := os.MkdirAll(parent, fs.ModePerm); err != nil {
   347  				log.Errorf("failed to create directory %q: %v", parent, err)
   348  				if symlinkErrStrategy == SymlinkErrReturn {
   349  					return nil, fmt.Errorf("failed to create directory %q: %w", parent, err)
   350  				}
   351  			}
   352  
   353  			target := header.Linkname
   354  			targetPath := target
   355  
   356  			if symlink.TargetOutsideRoot(cleanPath, target) {
   357  				log.Warnf("Found symlink that points outside the root, skipping: %q -> %q", cleanPath, target)
   358  				continue
   359  			}
   360  
   361  			// Only absolute destination need to be prepended. Relative destinations still work.
   362  			if filepath.IsAbs(targetPath) {
   363  				targetPath = filepath.Join(dir, target)
   364  				currRequiredTargets[target] = true
   365  			} else {
   366  				// Track the absolute path of the target so it is not skipped in the next pass.
   367  				targetAbs := filepath.Join(filepath.Dir(cleanPath), target)
   368  				currRequiredTargets[targetAbs] = true
   369  			}
   370  
   371  			if symlinkResolution == SymlinkRetain {
   372  				// TODO: b/412444199 - Use the os.Root API to create symlinks when root.Symlink is available.
   373  				if err := os.Symlink(targetPath, fullPath); err != nil {
   374  					log.Errorf("failed to symlink %q to %q: %v", fullPath, targetPath, err)
   375  					if symlinkErrStrategy == SymlinkErrReturn {
   376  						return nil, fmt.Errorf("failed to symlink %q to %q: %w", fullPath, targetPath, err)
   377  					}
   378  					continue
   379  				}
   380  				log.Infof("created symlink %q to %q", fullPath, targetPath)
   381  				continue
   382  			}
   383  
   384  			content, err := func() ([]byte, error) {
   385  				file, err := root.OpenFile(targetPath, os.O_RDONLY, 0644)
   386  				if err != nil {
   387  					return nil, fmt.Errorf("failed to open file %q: %w", targetPath, err)
   388  				}
   389  				content, err := io.ReadAll(file)
   390  				if err != nil {
   391  					return nil, fmt.Errorf("failed to read file %q: %w", targetPath, err)
   392  				}
   393  				if err := file.Close(); err != nil {
   394  					return nil, fmt.Errorf("failed to close file %q: %w", targetPath, err)
   395  				}
   396  				return content, nil
   397  			}()
   398  			if err != nil {
   399  				// If there is an error getting the contents of the target file, but this is not the final
   400  				// pass, then we can skip. This is because another pass might resolve the target file.
   401  				if !finalPass {
   402  					continue
   403  				}
   404  				log.Errorf("failed to get contents of file %q: %v", targetPath, err)
   405  				if symlinkErrStrategy == SymlinkErrLog {
   406  					continue
   407  				}
   408  				if symlinkErrStrategy == SymlinkErrReturn {
   409  					return nil, fmt.Errorf("failed to get contents of file %q: %w", targetPath, err)
   410  				}
   411  			}
   412  
   413  			// Attempt to write the contents of the target in the symlink's path as a regular file.
   414  			if err := safeWriteFile(root, cleanPath, content, 0644); err != nil {
   415  				log.Errorf("failed to write symlink as regular file %q: %v", cleanPath, err)
   416  				if symlinkErrStrategy == SymlinkErrReturn {
   417  					return nil, fmt.Errorf("failed to write symlink as regular file %q: %w", cleanPath, err)
   418  				}
   419  			}
   420  
   421  		case tar.TypeDir:
   422  			continue
   423  		}
   424  	}
   425  
   426  	return currRequiredTargets, nil
   427  }