github.com/moby/docker@v26.1.3+incompatible/pkg/archive/copy.go (about) 1 package archive // import "github.com/docker/docker/pkg/archive" 2 3 import ( 4 "archive/tar" 5 "context" 6 "errors" 7 "io" 8 "os" 9 "path/filepath" 10 "strings" 11 12 "github.com/containerd/log" 13 "github.com/docker/docker/pkg/system" 14 ) 15 16 // Errors used or returned by this file. 17 var ( 18 ErrNotDirectory = errors.New("not a directory") 19 ErrDirNotExists = errors.New("no such directory") 20 ErrCannotCopyDir = errors.New("cannot copy directory") 21 ErrInvalidCopySource = errors.New("invalid copy source content") 22 ) 23 24 // PreserveTrailingDotOrSeparator returns the given cleaned path (after 25 // processing using any utility functions from the path or filepath stdlib 26 // packages) and appends a trailing `/.` or `/` if its corresponding original 27 // path (from before being processed by utility functions from the path or 28 // filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned 29 // path already ends in a `.` path segment, then another is not added. If the 30 // clean path already ends in a path separator, then another is not added. 31 func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string) string { 32 // Ensure paths are in platform semantics 33 cleanedPath = normalizePath(cleanedPath) 34 originalPath = normalizePath(originalPath) 35 36 if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) { 37 if !hasTrailingPathSeparator(cleanedPath) { 38 // Add a separator if it doesn't already end with one (a cleaned 39 // path would only end in a separator if it is the root). 40 cleanedPath += string(filepath.Separator) 41 } 42 cleanedPath += "." 43 } 44 45 if !hasTrailingPathSeparator(cleanedPath) && hasTrailingPathSeparator(originalPath) { 46 cleanedPath += string(filepath.Separator) 47 } 48 49 return cleanedPath 50 } 51 52 // assertsDirectory returns whether the given path is 53 // asserted to be a directory, i.e., the path ends with 54 // a trailing '/' or `/.`, assuming a path separator of `/`. 55 func assertsDirectory(path string) bool { 56 return hasTrailingPathSeparator(path) || specifiesCurrentDir(path) 57 } 58 59 // hasTrailingPathSeparator returns whether the given 60 // path ends with the system's path separator character. 61 func hasTrailingPathSeparator(path string) bool { 62 return len(path) > 0 && path[len(path)-1] == filepath.Separator 63 } 64 65 // specifiesCurrentDir returns whether the given path specifies 66 // a "current directory", i.e., the last path segment is `.`. 67 func specifiesCurrentDir(path string) bool { 68 return filepath.Base(path) == "." 69 } 70 71 // SplitPathDirEntry splits the given path between its directory name and its 72 // basename by first cleaning the path but preserves a trailing "." if the 73 // original path specified the current directory. 74 func SplitPathDirEntry(path string) (dir, base string) { 75 cleanedPath := filepath.Clean(filepath.FromSlash(path)) 76 77 if specifiesCurrentDir(path) { 78 cleanedPath += string(os.PathSeparator) + "." 79 } 80 81 return filepath.Dir(cleanedPath), filepath.Base(cleanedPath) 82 } 83 84 // TarResource archives the resource described by the given CopyInfo to a Tar 85 // archive. A non-nil error is returned if sourcePath does not exist or is 86 // asserted to be a directory but exists as another type of file. 87 // 88 // This function acts as a convenient wrapper around TarWithOptions, which 89 // requires a directory as the source path. TarResource accepts either a 90 // directory or a file path and correctly sets the Tar options. 91 func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) { 92 return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName) 93 } 94 95 // TarResourceRebase is like TarResource but renames the first path element of 96 // items in the resulting tar archive to match the given rebaseName if not "". 97 func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) { 98 sourcePath = normalizePath(sourcePath) 99 if _, err = os.Lstat(sourcePath); err != nil { 100 // Catches the case where the source does not exist or is not a 101 // directory if asserted to be a directory, as this also causes an 102 // error. 103 return 104 } 105 106 // Separate the source path between its directory and 107 // the entry in that directory which we are archiving. 108 sourceDir, sourceBase := SplitPathDirEntry(sourcePath) 109 opts := TarResourceRebaseOpts(sourceBase, rebaseName) 110 111 log.G(context.TODO()).Debugf("copying %q from %q", sourceBase, sourceDir) 112 return TarWithOptions(sourceDir, opts) 113 } 114 115 // TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase 116 // parameters to be sent to TarWithOptions (the TarOptions struct) 117 func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions { 118 filter := []string{sourceBase} 119 return &TarOptions{ 120 Compression: Uncompressed, 121 IncludeFiles: filter, 122 IncludeSourceDir: true, 123 RebaseNames: map[string]string{ 124 sourceBase: rebaseName, 125 }, 126 } 127 } 128 129 // CopyInfo holds basic info about the source 130 // or destination path of a copy operation. 131 type CopyInfo struct { 132 Path string 133 Exists bool 134 IsDir bool 135 RebaseName string 136 } 137 138 // CopyInfoSourcePath stats the given path to create a CopyInfo 139 // struct representing that resource for the source of an archive copy 140 // operation. The given path should be an absolute local path. A source path 141 // has all symlinks evaluated that appear before the last path separator ("/" 142 // on Unix). As it is to be a copy source, the path must exist. 143 func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) { 144 // normalize the file path and then evaluate the symbol link 145 // we will use the target file instead of the symbol link if 146 // followLink is set 147 path = normalizePath(path) 148 149 resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink) 150 if err != nil { 151 return CopyInfo{}, err 152 } 153 154 stat, err := os.Lstat(resolvedPath) 155 if err != nil { 156 return CopyInfo{}, err 157 } 158 159 return CopyInfo{ 160 Path: resolvedPath, 161 Exists: true, 162 IsDir: stat.IsDir(), 163 RebaseName: rebaseName, 164 }, nil 165 } 166 167 // CopyInfoDestinationPath stats the given path to create a CopyInfo 168 // struct representing that resource for the destination of an archive copy 169 // operation. The given path should be an absolute local path. 170 func CopyInfoDestinationPath(path string) (info CopyInfo, err error) { 171 maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot. 172 path = normalizePath(path) 173 originalPath := path 174 175 stat, err := os.Lstat(path) 176 177 if err == nil && stat.Mode()&os.ModeSymlink == 0 { 178 // The path exists and is not a symlink. 179 return CopyInfo{ 180 Path: path, 181 Exists: true, 182 IsDir: stat.IsDir(), 183 }, nil 184 } 185 186 // While the path is a symlink. 187 for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ { 188 if n > maxSymlinkIter { 189 // Don't follow symlinks more than this arbitrary number of times. 190 return CopyInfo{}, errors.New("too many symlinks in " + originalPath) 191 } 192 193 // The path is a symbolic link. We need to evaluate it so that the 194 // destination of the copy operation is the link target and not the 195 // link itself. This is notably different than CopyInfoSourcePath which 196 // only evaluates symlinks before the last appearing path separator. 197 // Also note that it is okay if the last path element is a broken 198 // symlink as the copy operation should create the target. 199 var linkTarget string 200 201 linkTarget, err = os.Readlink(path) 202 if err != nil { 203 return CopyInfo{}, err 204 } 205 206 if !system.IsAbs(linkTarget) { 207 // Join with the parent directory. 208 dstParent, _ := SplitPathDirEntry(path) 209 linkTarget = filepath.Join(dstParent, linkTarget) 210 } 211 212 path = linkTarget 213 stat, err = os.Lstat(path) 214 } 215 216 if err != nil { 217 // It's okay if the destination path doesn't exist. We can still 218 // continue the copy operation if the parent directory exists. 219 if !os.IsNotExist(err) { 220 return CopyInfo{}, err 221 } 222 223 // Ensure destination parent dir exists. 224 dstParent, _ := SplitPathDirEntry(path) 225 226 parentDirStat, err := os.Stat(dstParent) 227 if err != nil { 228 return CopyInfo{}, err 229 } 230 if !parentDirStat.IsDir() { 231 return CopyInfo{}, ErrNotDirectory 232 } 233 234 return CopyInfo{Path: path}, nil 235 } 236 237 // The path exists after resolving symlinks. 238 return CopyInfo{ 239 Path: path, 240 Exists: true, 241 IsDir: stat.IsDir(), 242 }, nil 243 } 244 245 // PrepareArchiveCopy prepares the given srcContent archive, which should 246 // contain the archived resource described by srcInfo, to the destination 247 // described by dstInfo. Returns the possibly modified content archive along 248 // with the path to the destination directory which it should be extracted to. 249 func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) { 250 // Ensure in platform semantics 251 srcInfo.Path = normalizePath(srcInfo.Path) 252 dstInfo.Path = normalizePath(dstInfo.Path) 253 254 // Separate the destination path between its directory and base 255 // components in case the source archive contents need to be rebased. 256 dstDir, dstBase := SplitPathDirEntry(dstInfo.Path) 257 _, srcBase := SplitPathDirEntry(srcInfo.Path) 258 259 switch { 260 case dstInfo.Exists && dstInfo.IsDir: 261 // The destination exists as a directory. No alteration 262 // to srcContent is needed as its contents can be 263 // simply extracted to the destination directory. 264 return dstInfo.Path, io.NopCloser(srcContent), nil 265 case dstInfo.Exists && srcInfo.IsDir: 266 // The destination exists as some type of file and the source 267 // content is a directory. This is an error condition since 268 // you cannot copy a directory to an existing file location. 269 return "", nil, ErrCannotCopyDir 270 case dstInfo.Exists: 271 // The destination exists as some type of file and the source content 272 // is also a file. The source content entry will have to be renamed to 273 // have a basename which matches the destination path's basename. 274 if len(srcInfo.RebaseName) != 0 { 275 srcBase = srcInfo.RebaseName 276 } 277 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 278 case srcInfo.IsDir: 279 // The destination does not exist and the source content is an archive 280 // of a directory. The archive should be extracted to the parent of 281 // the destination path instead, and when it is, the directory that is 282 // created as a result should take the name of the destination path. 283 // The source content entries will have to be renamed to have a 284 // basename which matches the destination path's basename. 285 if len(srcInfo.RebaseName) != 0 { 286 srcBase = srcInfo.RebaseName 287 } 288 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 289 case assertsDirectory(dstInfo.Path): 290 // The destination does not exist and is asserted to be created as a 291 // directory, but the source content is not a directory. This is an 292 // error condition since you cannot create a directory from a file 293 // source. 294 return "", nil, ErrDirNotExists 295 default: 296 // The last remaining case is when the destination does not exist, is 297 // not asserted to be a directory, and the source content is not an 298 // archive of a directory. It this case, the destination file will need 299 // to be created when the archive is extracted and the source content 300 // entry will have to be renamed to have a basename which matches the 301 // destination path's basename. 302 if len(srcInfo.RebaseName) != 0 { 303 srcBase = srcInfo.RebaseName 304 } 305 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 306 } 307 } 308 309 // RebaseArchiveEntries rewrites the given srcContent archive replacing 310 // an occurrence of oldBase with newBase at the beginning of entry names. 311 func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser { 312 if oldBase == string(os.PathSeparator) { 313 // If oldBase specifies the root directory, use an empty string as 314 // oldBase instead so that newBase doesn't replace the path separator 315 // that all paths will start with. 316 oldBase = "" 317 } 318 319 rebased, w := io.Pipe() 320 321 go func() { 322 srcTar := tar.NewReader(srcContent) 323 rebasedTar := tar.NewWriter(w) 324 325 for { 326 hdr, err := srcTar.Next() 327 if err == io.EOF { 328 // Signals end of archive. 329 rebasedTar.Close() 330 w.Close() 331 return 332 } 333 if err != nil { 334 w.CloseWithError(err) 335 return 336 } 337 338 // srcContent tar stream, as served by TarWithOptions(), is 339 // definitely in PAX format, but tar.Next() mistakenly guesses it 340 // as USTAR, which creates a problem: if the newBase is >100 341 // characters long, WriteHeader() returns an error like 342 // "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...". 343 // 344 // To fix, set the format to PAX here. See docker/for-linux issue #484. 345 hdr.Format = tar.FormatPAX 346 hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1) 347 if hdr.Typeflag == tar.TypeLink { 348 hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1) 349 } 350 351 if err = rebasedTar.WriteHeader(hdr); err != nil { 352 w.CloseWithError(err) 353 return 354 } 355 356 // Ignoring GoSec G110. See https://github.com/securego/gosec/pull/433 357 // and https://cure53.de/pentest-report_opa.pdf, which recommends to 358 // replace io.Copy with io.CopyN7. The latter allows to specify the 359 // maximum number of bytes that should be read. By properly defining 360 // the limit, it can be assured that a GZip compression bomb cannot 361 // easily cause a Denial-of-Service. 362 // After reviewing with @tonistiigi and @cpuguy83, this should not 363 // affect us, because here we do not read into memory, hence should 364 // not be vulnerable to this code consuming memory. 365 //nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec) 366 if _, err = io.Copy(rebasedTar, srcTar); err != nil { 367 w.CloseWithError(err) 368 return 369 } 370 } 371 }() 372 373 return rebased 374 } 375 376 // CopyResource performs an archive copy from the given source path to the 377 // given destination path. The source path MUST exist and the destination 378 // path's parent directory must exist. 379 func CopyResource(srcPath, dstPath string, followLink bool) error { 380 var ( 381 srcInfo CopyInfo 382 err error 383 ) 384 385 // Ensure in platform semantics 386 srcPath = normalizePath(srcPath) 387 dstPath = normalizePath(dstPath) 388 389 // Clean the source and destination paths. 390 srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath) 391 dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath) 392 393 if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil { 394 return err 395 } 396 397 content, err := TarResource(srcInfo) 398 if err != nil { 399 return err 400 } 401 defer content.Close() 402 403 return CopyTo(content, srcInfo, dstPath) 404 } 405 406 // CopyTo handles extracting the given content whose 407 // entries should be sourced from srcInfo to dstPath. 408 func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error { 409 // The destination path need not exist, but CopyInfoDestinationPath will 410 // ensure that at least the parent directory exists. 411 dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath)) 412 if err != nil { 413 return err 414 } 415 416 dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo) 417 if err != nil { 418 return err 419 } 420 defer copyArchive.Close() 421 422 options := &TarOptions{ 423 NoLchown: true, 424 NoOverwriteDirNonDir: true, 425 } 426 427 return Untar(copyArchive, dstDir, options) 428 } 429 430 // ResolveHostSourcePath decides real path need to be copied with parameters such as 431 // whether to follow symbol link or not, if followLink is true, resolvedPath will return 432 // link target of any symbol link file, else it will only resolve symlink of directory 433 // but return symbol link file itself without resolving. 434 func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) { 435 if followLink { 436 resolvedPath, err = filepath.EvalSymlinks(path) 437 if err != nil { 438 return 439 } 440 441 resolvedPath, rebaseName = GetRebaseName(path, resolvedPath) 442 } else { 443 dirPath, basePath := filepath.Split(path) 444 445 // if not follow symbol link, then resolve symbol link of parent dir 446 var resolvedDirPath string 447 resolvedDirPath, err = filepath.EvalSymlinks(dirPath) 448 if err != nil { 449 return 450 } 451 // resolvedDirPath will have been cleaned (no trailing path separators) so 452 // we can manually join it with the base path element. 453 resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath 454 if hasTrailingPathSeparator(path) && 455 filepath.Base(path) != filepath.Base(resolvedPath) { 456 rebaseName = filepath.Base(path) 457 } 458 } 459 return resolvedPath, rebaseName, nil 460 } 461 462 // GetRebaseName normalizes and compares path and resolvedPath, 463 // return completed resolved path and rebased file name 464 func GetRebaseName(path, resolvedPath string) (string, string) { 465 // linkTarget will have been cleaned (no trailing path separators and dot) so 466 // we can manually join it with them 467 var rebaseName string 468 if specifiesCurrentDir(path) && 469 !specifiesCurrentDir(resolvedPath) { 470 resolvedPath += string(filepath.Separator) + "." 471 } 472 473 if hasTrailingPathSeparator(path) && 474 !hasTrailingPathSeparator(resolvedPath) { 475 resolvedPath += string(filepath.Separator) 476 } 477 478 if filepath.Base(path) != filepath.Base(resolvedPath) { 479 // In the case where the path had a trailing separator and a symlink 480 // evaluation has changed the last path component, we will need to 481 // rebase the name in the archive that is being copied to match the 482 // originally requested name. 483 rebaseName = filepath.Base(path) 484 } 485 return resolvedPath, rebaseName 486 }