github.com/google/osv-scalibr@v0.4.1/artifact/image/unpack/unpack.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package unpack contains functions to unpack an image. 16 package unpack 17 18 import ( 19 "bytes" 20 "errors" 21 "fmt" 22 "io" 23 "io/fs" 24 "os" 25 "path" 26 "path/filepath" 27 "strings" 28 29 "archive/tar" 30 31 v1 "github.com/google/go-containerregistry/pkg/v1" 32 "github.com/google/osv-scalibr/artifact/image/require" 33 "github.com/google/osv-scalibr/artifact/image/symlink" 34 scalibrtar "github.com/google/osv-scalibr/artifact/image/tar" 35 "github.com/google/osv-scalibr/log" 36 ) 37 38 const ( 39 // SymlinkRetain specifies that the symlink should be retained as a symlink. 40 SymlinkRetain SymlinkResolution = "symlink_retain" 41 // SymlinkIgnore specifies that the symlink should be ignored. 42 SymlinkIgnore SymlinkResolution = "symlink_ignore" 43 44 // SymlinkErrLog specifies that errors resolving symlinks are logged but not returned. Image unpacking continues. 45 SymlinkErrLog SymlinkErrStrategy = "symlink_err_log" 46 // SymlinkErrReturn specifies that errors resolving symlinks are returned, which stops unpacking the image. 47 SymlinkErrReturn SymlinkErrStrategy = "symlink_err_return" 48 49 // DefaultMaxPass is the default maximum number of times the image is unpacked to resolve symlinks. 50 DefaultMaxPass = 3 51 // DefaultMaxFileBytes is the default maximum size of files that will be unpacked. Larger files are ignored. 52 // The max is large because some files, like kube-apiserver, are ~115MB. 53 DefaultMaxFileBytes = 1024 * 1024 * 1024 // 1GB 54 ) 55 56 // SymlinkResolution specifies how to resolve symlinks. 57 type SymlinkResolution string 58 59 // SymlinkErrStrategy how to handle errors resolving symlinks. 60 type SymlinkErrStrategy string 61 62 // Unpacker unpacks the image. 63 type Unpacker struct { 64 SymlinkResolution SymlinkResolution 65 SymlinkErrStrategy SymlinkErrStrategy 66 MaxPass int 67 MaxSizeBytes int64 68 Requirer require.FileRequirer 69 } 70 71 // UnpackerConfig configures how to unpack the image. 72 type UnpackerConfig struct { 73 // SymlinkResolution specifies how to resolve symlinks. 74 SymlinkResolution SymlinkResolution 75 // SymlinkErrStrategy specifies how to handle symlink errors. 76 SymlinkErrStrategy SymlinkErrStrategy 77 // MaxPass limits the times the image is unpacked to resolve symlinks. 0 or less is essentially "unset" and will default to 2. 78 MaxPass int 79 // MaxFileBytes is the maximum size of files that will be unpacked. Larger files are ignored. 80 MaxFileBytes int64 81 // Requirer's FileRequired function is run on each file during unpacking. The file is unpacked if true and ignored if false. 82 Requirer require.FileRequirer 83 } 84 85 // DefaultUnpackerConfig returns default configurations for a new Unpacker. 86 func DefaultUnpackerConfig() *UnpackerConfig { 87 return &UnpackerConfig{ 88 SymlinkResolution: SymlinkRetain, 89 SymlinkErrStrategy: SymlinkErrLog, 90 MaxPass: DefaultMaxPass, 91 MaxFileBytes: DefaultMaxFileBytes, 92 Requirer: &require.FileRequirerAll{}, 93 } 94 } 95 96 // WithMaxPass returns a UnpackerConfig with the specified MaxPass param. 97 func (cfg *UnpackerConfig) WithMaxPass(maxPass int) *UnpackerConfig { 98 cfg.MaxPass = maxPass 99 return cfg 100 } 101 102 // WithMaxFileBytes returns a UnpackerConfig with the specified MaxFileBytes param. 103 func (cfg *UnpackerConfig) WithMaxFileBytes(maxFileBytes int64) *UnpackerConfig { 104 cfg.MaxFileBytes = maxFileBytes 105 return cfg 106 } 107 108 // WithSymlinkResolution returns a UnpackerConfig with the specified SymlinkResolution param. 109 func (cfg *UnpackerConfig) WithSymlinkResolution(resolution SymlinkResolution) *UnpackerConfig { 110 cfg.SymlinkResolution = resolution 111 return cfg 112 } 113 114 // WithRequirer returns a UnpackerConfig with the specified FileRequirer param. 115 func (cfg *UnpackerConfig) WithRequirer(requirer require.FileRequirer) *UnpackerConfig { 116 cfg.Requirer = requirer 117 return cfg 118 } 119 120 // NewUnpacker creates a new Unpacker. 121 func NewUnpacker(cfg *UnpackerConfig) (*Unpacker, error) { 122 if cfg.SymlinkResolution == "" { 123 return nil, errors.New("cfg.SymlinkResolution was not specified") 124 } 125 if cfg.SymlinkErrStrategy == "" { 126 return nil, errors.New("cfg.SymlinkErrStrategy was not specified") 127 } 128 129 maxPass := DefaultMaxPass 130 if cfg.MaxPass > 0 { 131 maxPass = cfg.MaxPass 132 } 133 maxFileBytes := cfg.MaxFileBytes 134 if cfg.MaxFileBytes <= 0 { 135 maxFileBytes = 1024 * 1024 * 1024 * 1024 // 1TB 136 } 137 138 if cfg.Requirer == nil { 139 return nil, errors.New("cfg.Requirer cannot be nil") 140 } 141 142 return &Unpacker{ 143 SymlinkResolution: cfg.SymlinkResolution, 144 SymlinkErrStrategy: cfg.SymlinkErrStrategy, 145 MaxPass: maxPass, 146 MaxSizeBytes: maxFileBytes, 147 Requirer: cfg.Requirer, 148 }, nil 149 } 150 151 // UnpackSquashed squashes the layers of image then copies its contents to dir. 152 func (u *Unpacker) UnpackSquashed(dir string, image v1.Image) error { 153 if u.SymlinkResolution == SymlinkIgnore { 154 return fmt.Errorf("symlink resolution strategy %q is not supported", u.SymlinkResolution) 155 } 156 157 if dir == "" { 158 return fmt.Errorf("dir cannot be root %q", dir) 159 } 160 if image == nil { 161 return errors.New("image cannot be nil") 162 } 163 164 tarDir, err := os.MkdirTemp("", "image-tar-tmp-*") 165 if err != nil { 166 return fmt.Errorf("failed to create temporary directory for image tar: %w", err) 167 } 168 defer func() { 169 if err := os.RemoveAll(tarDir); err != nil { 170 log.Errorf("failed to remove temporary directory for image tar %q: %v", tarDir, err) 171 } 172 }() 173 tarPath := filepath.Join(tarDir, "image.tar") 174 defer func() { 175 if err := os.Remove(tarPath); err != nil { 176 log.Errorf("failed to remove temporary tar file %q: %v", tarPath, err) 177 } 178 }() 179 if err := scalibrtar.SaveToTarball(tarPath, image); err != nil { 180 if strings.Contains(err.Error(), "invalid tar header") { 181 return fmt.Errorf("invalid tar header when saving image to tarball (error message %q) with %q", tarPath, err.Error()) 182 } 183 return fmt.Errorf("failed to save image to tarball %q: %w", tarPath, err) 184 } 185 186 return u.UnpackSquashedFromTarball(dir, tarPath) 187 } 188 189 // UnpackSquashedFromTarball squashes the layers of an image from a tarball then 190 // copies its contents to dir. 191 func (u *Unpacker) UnpackSquashedFromTarball(dir string, tarPath string) error { 192 // requiredTargets stores targets that symlinks point to. 193 // This is needed because the symlink may be required by u.requirer, but the target may not be. 194 requiredTargets := make(map[string]bool) 195 for pass := range u.MaxPass { 196 finalPass := false 197 // Resolve symlinks on the last pass once all potential target files have been unpacked. 198 if pass == u.MaxPass-1 { 199 finalPass = true 200 } 201 reader, err := os.Open(tarPath) 202 if err != nil { 203 log.Errorf("Failed to open tarball of image at %q: %v", tarPath, err) 204 return fmt.Errorf("failed to open tarball of image at %q: %w", tarPath, err) 205 } 206 log.Infof("Unpacking pass %d of %d", pass+1, u.MaxPass) 207 requiredTargets, err = unpack(dir, reader, u.SymlinkResolution, u.SymlinkErrStrategy, u.Requirer, requiredTargets, finalPass, u.MaxSizeBytes) 208 _ = reader.Close() 209 if err != nil { 210 return err 211 } 212 } 213 214 // Remove symlinks that have a nonexistent destination file or nonexistent destination directory. 215 if err := symlink.RemoveObsoleteSymlinks(dir); err != nil { 216 return fmt.Errorf("failed to remove obsolete symlinks from dir %q: %w", dir, err) 217 } 218 219 return nil 220 } 221 222 // safeWriteFile is a helper function that uses os.Root to write to a file with the specified 223 // permissions. 224 func safeWriteFile(root *os.Root, path string, content []byte, perm os.FileMode) error { 225 // os.Root.OpenFile only supports the 9 least significant bits (0o777), 226 // so ensure we strip any other bits (like setuid, sticky bit, etc.) 227 normalizedPerm := perm & 0o777 228 229 file, err := root.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, normalizedPerm) 230 if err != nil { 231 log.Errorf("failed to open file %q: %v", path, err) 232 return fmt.Errorf("failed to open file %q: %w", path, err) 233 } 234 235 _, err = file.Write(content) 236 if err != nil { 237 log.Errorf("failed to write file %q: %v", path, err) 238 return fmt.Errorf("failed to write file %q: %w", path, err) 239 } 240 241 if err := file.Close(); err != nil { 242 log.Errorf("failed to close file %q: %v", path, err) 243 return fmt.Errorf("failed to close file %q: %w", path, err) 244 } 245 return nil 246 } 247 248 func unpack(dir string, reader io.Reader, symlinkResolution SymlinkResolution, symlinkErrStrategy SymlinkErrStrategy, requirer require.FileRequirer, requiredTargets map[string]bool, finalPass bool, maxSizeBytes int64) (map[string]bool, error) { 249 tarReader := tar.NewReader(reader) 250 251 root, err := os.OpenRoot(dir) 252 if err != nil { 253 return nil, fmt.Errorf("failed to open root directory: %w", err) 254 } 255 defer root.Close() 256 257 // Defensive copy of requiredTargets to avoid modifying the original. 258 currRequiredTargets := make(map[string]bool) 259 for t := range requiredTargets { 260 currRequiredTargets[t] = true 261 } 262 263 for { 264 header, err := tarReader.Next() 265 if err != nil { 266 if errors.Is(err, io.EOF) { 267 break 268 } 269 return nil, fmt.Errorf("failed to read next header in tarball: %w", err) 270 } 271 272 if header.Size > maxSizeBytes { 273 log.Infof("skipping file %q because its size (%d bytes) is larger than the max size (%d bytes)", header.Name, header.Size, maxSizeBytes) 274 continue 275 } 276 277 cleanPath := path.Clean(header.Name) 278 fullPath := path.Join(dir, cleanPath) 279 280 // Skip files already unpacked. 281 // Lstat is used instead of Stat to avoid following symlinks, because their targets may not exist yet. 282 if _, err = root.Lstat(fullPath); err == nil { 283 continue 284 } 285 286 // Skip files that are not required by extractors and are not targets of required symlinks. 287 // Try multiple paths variations 288 // (with parent dir, without leading slash, with leading slash). For example: 289 // - `fullPath`: `tmp/12345/etc/os-release`. This is used when actually writing the file to disk. 290 // - `cleanPath`: `etc/os-release`. This is used when checking if the file is required. 291 // - `filepath.Join("/", cleanPath)`: `/etc/os-release`. This is used when checking if the file is required. 292 required := false 293 for _, p := range []string{fullPath, cleanPath, filepath.Join("/", cleanPath)} { 294 if requirer.FileRequired(p, header.FileInfo()) { 295 required = true 296 break 297 } 298 if _, ok := currRequiredTargets[p]; ok { 299 required = true 300 break 301 } 302 } 303 if !required { 304 continue 305 } 306 307 switch header.Typeflag { 308 case tar.TypeReg: 309 buf := new(bytes.Buffer) 310 _, err = io.Copy(buf, tarReader) 311 if err != nil { 312 return nil, err 313 } 314 315 content := buf.Bytes() 316 317 parent := filepath.Dir(fullPath) 318 if err := os.MkdirAll(parent, fs.ModePerm); err != nil { 319 log.Errorf("failed to create directory %q for file %q: %v", parent, fullPath, err) 320 return nil, fmt.Errorf("failed to create directory %q for file %q: %w", parent, fullPath, err) 321 } 322 323 // Retain the original file permission but update it so we can always read and write the file. 324 modeWithOwnerReadWrite := header.FileInfo().Mode() | 0600 325 326 err = safeWriteFile(root, cleanPath, content, modeWithOwnerReadWrite) 327 if err != nil { 328 // TODO: b/412437775 - The error handling below is not ideal. It will become a mess if other 329 // exceptions are added. Unfortunately, the os package does not export the underlying 330 // error, so we have to do string matching for now. 331 if strings.Contains(err.Error(), "path escapes from parent") { 332 log.Warnf("path escapes from parent, potential path traversal attack detected: %q: %v", fullPath, err) 333 continue 334 } 335 if strings.Contains(err.Error(), "too many levels of symbolic links") { 336 log.Warnf("too many levels of symbolic links found: %q: %v", fullPath, err) 337 continue 338 } 339 return nil, err 340 } 341 342 // TODO: b/406760694 - Remove this once the bug is fixed. 343 344 case tar.TypeLink, tar.TypeSymlink: 345 parent := filepath.Dir(fullPath) 346 if err := os.MkdirAll(parent, fs.ModePerm); err != nil { 347 log.Errorf("failed to create directory %q: %v", parent, err) 348 if symlinkErrStrategy == SymlinkErrReturn { 349 return nil, fmt.Errorf("failed to create directory %q: %w", parent, err) 350 } 351 } 352 353 target := header.Linkname 354 targetPath := target 355 356 if symlink.TargetOutsideRoot(cleanPath, target) { 357 log.Warnf("Found symlink that points outside the root, skipping: %q -> %q", cleanPath, target) 358 continue 359 } 360 361 // Only absolute destination need to be prepended. Relative destinations still work. 362 if filepath.IsAbs(targetPath) { 363 targetPath = filepath.Join(dir, target) 364 currRequiredTargets[target] = true 365 } else { 366 // Track the absolute path of the target so it is not skipped in the next pass. 367 targetAbs := filepath.Join(filepath.Dir(cleanPath), target) 368 currRequiredTargets[targetAbs] = true 369 } 370 371 if symlinkResolution == SymlinkRetain { 372 // TODO: b/412444199 - Use the os.Root API to create symlinks when root.Symlink is available. 373 if err := os.Symlink(targetPath, fullPath); err != nil { 374 log.Errorf("failed to symlink %q to %q: %v", fullPath, targetPath, err) 375 if symlinkErrStrategy == SymlinkErrReturn { 376 return nil, fmt.Errorf("failed to symlink %q to %q: %w", fullPath, targetPath, err) 377 } 378 continue 379 } 380 log.Infof("created symlink %q to %q", fullPath, targetPath) 381 continue 382 } 383 384 content, err := func() ([]byte, error) { 385 file, err := root.OpenFile(targetPath, os.O_RDONLY, 0644) 386 if err != nil { 387 return nil, fmt.Errorf("failed to open file %q: %w", targetPath, err) 388 } 389 content, err := io.ReadAll(file) 390 if err != nil { 391 return nil, fmt.Errorf("failed to read file %q: %w", targetPath, err) 392 } 393 if err := file.Close(); err != nil { 394 return nil, fmt.Errorf("failed to close file %q: %w", targetPath, err) 395 } 396 return content, nil 397 }() 398 if err != nil { 399 // If there is an error getting the contents of the target file, but this is not the final 400 // pass, then we can skip. This is because another pass might resolve the target file. 401 if !finalPass { 402 continue 403 } 404 log.Errorf("failed to get contents of file %q: %v", targetPath, err) 405 if symlinkErrStrategy == SymlinkErrLog { 406 continue 407 } 408 if symlinkErrStrategy == SymlinkErrReturn { 409 return nil, fmt.Errorf("failed to get contents of file %q: %w", targetPath, err) 410 } 411 } 412 413 // Attempt to write the contents of the target in the symlink's path as a regular file. 414 if err := safeWriteFile(root, cleanPath, content, 0644); err != nil { 415 log.Errorf("failed to write symlink as regular file %q: %v", cleanPath, err) 416 if symlinkErrStrategy == SymlinkErrReturn { 417 return nil, fmt.Errorf("failed to write symlink as regular file %q: %w", cleanPath, err) 418 } 419 } 420 421 case tar.TypeDir: 422 continue 423 } 424 } 425 426 return currRequiredTargets, nil 427 }