github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/pkg/archive/copy.go (about) 1 package archive // import "github.com/docker/docker/pkg/archive" 2 3 import ( 4 "archive/tar" 5 "errors" 6 "io" 7 "os" 8 "path/filepath" 9 "strings" 10 11 "github.com/docker/docker/pkg/system" 12 "github.com/sirupsen/logrus" 13 ) 14 15 // Errors used or returned by this file. 16 var ( 17 ErrNotDirectory = errors.New("not a directory") 18 ErrDirNotExists = errors.New("no such directory") 19 ErrCannotCopyDir = errors.New("cannot copy directory") 20 ErrInvalidCopySource = errors.New("invalid copy source content") 21 ) 22 23 // PreserveTrailingDotOrSeparator returns the given cleaned path (after 24 // processing using any utility functions from the path or filepath stdlib 25 // packages) and appends a trailing `/.` or `/` if its corresponding original 26 // path (from before being processed by utility functions from the path or 27 // filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned 28 // path already ends in a `.` path segment, then another is not added. If the 29 // clean path already ends in a path separator, then another is not added. 30 func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string) string { 31 // Ensure paths are in platform semantics 32 cleanedPath = normalizePath(cleanedPath) 33 originalPath = normalizePath(originalPath) 34 35 if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) { 36 if !hasTrailingPathSeparator(cleanedPath) { 37 // Add a separator if it doesn't already end with one (a cleaned 38 // path would only end in a separator if it is the root). 39 cleanedPath += string(filepath.Separator) 40 } 41 cleanedPath += "." 42 } 43 44 if !hasTrailingPathSeparator(cleanedPath) && hasTrailingPathSeparator(originalPath) { 45 cleanedPath += string(filepath.Separator) 46 } 47 48 return cleanedPath 49 } 50 51 // assertsDirectory returns whether the given path is 52 // asserted to be a directory, i.e., the path ends with 53 // a trailing '/' or `/.`, assuming a path separator of `/`. 54 func assertsDirectory(path string) bool { 55 return hasTrailingPathSeparator(path) || specifiesCurrentDir(path) 56 } 57 58 // hasTrailingPathSeparator returns whether the given 59 // path ends with the system's path separator character. 60 func hasTrailingPathSeparator(path string) bool { 61 return len(path) > 0 && path[len(path)-1] == filepath.Separator 62 } 63 64 // specifiesCurrentDir returns whether the given path specifies 65 // a "current directory", i.e., the last path segment is `.`. 66 func specifiesCurrentDir(path string) bool { 67 return filepath.Base(path) == "." 68 } 69 70 // SplitPathDirEntry splits the given path between its directory name and its 71 // basename by first cleaning the path but preserves a trailing "." if the 72 // original path specified the current directory. 73 func SplitPathDirEntry(path string) (dir, base string) { 74 cleanedPath := filepath.Clean(filepath.FromSlash(path)) 75 76 if specifiesCurrentDir(path) { 77 cleanedPath += string(os.PathSeparator) + "." 78 } 79 80 return filepath.Dir(cleanedPath), filepath.Base(cleanedPath) 81 } 82 83 // TarResource archives the resource described by the given CopyInfo to a Tar 84 // archive. A non-nil error is returned if sourcePath does not exist or is 85 // asserted to be a directory but exists as another type of file. 86 // 87 // This function acts as a convenient wrapper around TarWithOptions, which 88 // requires a directory as the source path. TarResource accepts either a 89 // directory or a file path and correctly sets the Tar options. 90 func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) { 91 return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName) 92 } 93 94 // TarResourceRebase is like TarResource but renames the first path element of 95 // items in the resulting tar archive to match the given rebaseName if not "". 96 func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) { 97 sourcePath = normalizePath(sourcePath) 98 if _, err = os.Lstat(sourcePath); err != nil { 99 // Catches the case where the source does not exist or is not a 100 // directory if asserted to be a directory, as this also causes an 101 // error. 102 return 103 } 104 105 // Separate the source path between its directory and 106 // the entry in that directory which we are archiving. 107 sourceDir, sourceBase := SplitPathDirEntry(sourcePath) 108 opts := TarResourceRebaseOpts(sourceBase, rebaseName) 109 110 logrus.Debugf("copying %q from %q", sourceBase, sourceDir) 111 return TarWithOptions(sourceDir, opts) 112 } 113 114 // TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase 115 // parameters to be sent to TarWithOptions (the TarOptions struct) 116 func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions { 117 filter := []string{sourceBase} 118 return &TarOptions{ 119 Compression: Uncompressed, 120 IncludeFiles: filter, 121 IncludeSourceDir: true, 122 RebaseNames: map[string]string{ 123 sourceBase: rebaseName, 124 }, 125 } 126 } 127 128 // CopyInfo holds basic info about the source 129 // or destination path of a copy operation. 130 type CopyInfo struct { 131 Path string 132 Exists bool 133 IsDir bool 134 RebaseName string 135 } 136 137 // CopyInfoSourcePath stats the given path to create a CopyInfo 138 // struct representing that resource for the source of an archive copy 139 // operation. The given path should be an absolute local path. A source path 140 // has all symlinks evaluated that appear before the last path separator ("/" 141 // on Unix). As it is to be a copy source, the path must exist. 142 func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) { 143 // normalize the file path and then evaluate the symbol link 144 // we will use the target file instead of the symbol link if 145 // followLink is set 146 path = normalizePath(path) 147 148 resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink) 149 if err != nil { 150 return CopyInfo{}, err 151 } 152 153 stat, err := os.Lstat(resolvedPath) 154 if err != nil { 155 return CopyInfo{}, err 156 } 157 158 return CopyInfo{ 159 Path: resolvedPath, 160 Exists: true, 161 IsDir: stat.IsDir(), 162 RebaseName: rebaseName, 163 }, nil 164 } 165 166 // CopyInfoDestinationPath stats the given path to create a CopyInfo 167 // struct representing that resource for the destination of an archive copy 168 // operation. The given path should be an absolute local path. 169 func CopyInfoDestinationPath(path string) (info CopyInfo, err error) { 170 maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot. 171 path = normalizePath(path) 172 originalPath := path 173 174 stat, err := os.Lstat(path) 175 176 if err == nil && stat.Mode()&os.ModeSymlink == 0 { 177 // The path exists and is not a symlink. 178 return CopyInfo{ 179 Path: path, 180 Exists: true, 181 IsDir: stat.IsDir(), 182 }, nil 183 } 184 185 // While the path is a symlink. 186 for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ { 187 if n > maxSymlinkIter { 188 // Don't follow symlinks more than this arbitrary number of times. 189 return CopyInfo{}, errors.New("too many symlinks in " + originalPath) 190 } 191 192 // The path is a symbolic link. We need to evaluate it so that the 193 // destination of the copy operation is the link target and not the 194 // link itself. This is notably different than CopyInfoSourcePath which 195 // only evaluates symlinks before the last appearing path separator. 196 // Also note that it is okay if the last path element is a broken 197 // symlink as the copy operation should create the target. 198 var linkTarget string 199 200 linkTarget, err = os.Readlink(path) 201 if err != nil { 202 return CopyInfo{}, err 203 } 204 205 if !system.IsAbs(linkTarget) { 206 // Join with the parent directory. 207 dstParent, _ := SplitPathDirEntry(path) 208 linkTarget = filepath.Join(dstParent, linkTarget) 209 } 210 211 path = linkTarget 212 stat, err = os.Lstat(path) 213 } 214 215 if err != nil { 216 // It's okay if the destination path doesn't exist. We can still 217 // continue the copy operation if the parent directory exists. 218 if !os.IsNotExist(err) { 219 return CopyInfo{}, err 220 } 221 222 // Ensure destination parent dir exists. 223 dstParent, _ := SplitPathDirEntry(path) 224 225 parentDirStat, err := os.Stat(dstParent) 226 if err != nil { 227 return CopyInfo{}, err 228 } 229 if !parentDirStat.IsDir() { 230 return CopyInfo{}, ErrNotDirectory 231 } 232 233 return CopyInfo{Path: path}, nil 234 } 235 236 // The path exists after resolving symlinks. 237 return CopyInfo{ 238 Path: path, 239 Exists: true, 240 IsDir: stat.IsDir(), 241 }, nil 242 } 243 244 // PrepareArchiveCopy prepares the given srcContent archive, which should 245 // contain the archived resource described by srcInfo, to the destination 246 // described by dstInfo. Returns the possibly modified content archive along 247 // with the path to the destination directory which it should be extracted to. 248 func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) { 249 // Ensure in platform semantics 250 srcInfo.Path = normalizePath(srcInfo.Path) 251 dstInfo.Path = normalizePath(dstInfo.Path) 252 253 // Separate the destination path between its directory and base 254 // components in case the source archive contents need to be rebased. 255 dstDir, dstBase := SplitPathDirEntry(dstInfo.Path) 256 _, srcBase := SplitPathDirEntry(srcInfo.Path) 257 258 switch { 259 case dstInfo.Exists && dstInfo.IsDir: 260 // The destination exists as a directory. No alteration 261 // to srcContent is needed as its contents can be 262 // simply extracted to the destination directory. 263 return dstInfo.Path, io.NopCloser(srcContent), nil 264 case dstInfo.Exists && srcInfo.IsDir: 265 // The destination exists as some type of file and the source 266 // content is a directory. This is an error condition since 267 // you cannot copy a directory to an existing file location. 268 return "", nil, ErrCannotCopyDir 269 case dstInfo.Exists: 270 // The destination exists as some type of file and the source content 271 // is also a file. The source content entry will have to be renamed to 272 // have a basename which matches the destination path's basename. 273 if len(srcInfo.RebaseName) != 0 { 274 srcBase = srcInfo.RebaseName 275 } 276 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 277 case srcInfo.IsDir: 278 // The destination does not exist and the source content is an archive 279 // of a directory. The archive should be extracted to the parent of 280 // the destination path instead, and when it is, the directory that is 281 // created as a result should take the name of the destination path. 282 // The source content entries will have to be renamed to have a 283 // basename which matches the destination path's basename. 284 if len(srcInfo.RebaseName) != 0 { 285 srcBase = srcInfo.RebaseName 286 } 287 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 288 case assertsDirectory(dstInfo.Path): 289 // The destination does not exist and is asserted to be created as a 290 // directory, but the source content is not a directory. This is an 291 // error condition since you cannot create a directory from a file 292 // source. 293 return "", nil, ErrDirNotExists 294 default: 295 // The last remaining case is when the destination does not exist, is 296 // not asserted to be a directory, and the source content is not an 297 // archive of a directory. It this case, the destination file will need 298 // to be created when the archive is extracted and the source content 299 // entry will have to be renamed to have a basename which matches the 300 // destination path's basename. 301 if len(srcInfo.RebaseName) != 0 { 302 srcBase = srcInfo.RebaseName 303 } 304 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 305 } 306 } 307 308 // RebaseArchiveEntries rewrites the given srcContent archive replacing 309 // an occurrence of oldBase with newBase at the beginning of entry names. 310 func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser { 311 if oldBase == string(os.PathSeparator) { 312 // If oldBase specifies the root directory, use an empty string as 313 // oldBase instead so that newBase doesn't replace the path separator 314 // that all paths will start with. 315 oldBase = "" 316 } 317 318 rebased, w := io.Pipe() 319 320 go func() { 321 srcTar := tar.NewReader(srcContent) 322 rebasedTar := tar.NewWriter(w) 323 324 for { 325 hdr, err := srcTar.Next() 326 if err == io.EOF { 327 // Signals end of archive. 328 rebasedTar.Close() 329 w.Close() 330 return 331 } 332 if err != nil { 333 w.CloseWithError(err) 334 return 335 } 336 337 // srcContent tar stream, as served by TarWithOptions(), is 338 // definitely in PAX format, but tar.Next() mistakenly guesses it 339 // as USTAR, which creates a problem: if the newBase is >100 340 // characters long, WriteHeader() returns an error like 341 // "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...". 342 // 343 // To fix, set the format to PAX here. See docker/for-linux issue #484. 344 hdr.Format = tar.FormatPAX 345 hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1) 346 if hdr.Typeflag == tar.TypeLink { 347 hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1) 348 } 349 350 if err = rebasedTar.WriteHeader(hdr); err != nil { 351 w.CloseWithError(err) 352 return 353 } 354 355 // Ignoring GoSec G110. See https://github.com/securego/gosec/pull/433 356 // and https://cure53.de/pentest-report_opa.pdf, which recommends to 357 // replace io.Copy with io.CopyN7. The latter allows to specify the 358 // maximum number of bytes that should be read. By properly defining 359 // the limit, it can be assured that a GZip compression bomb cannot 360 // easily cause a Denial-of-Service. 361 // After reviewing with @tonistiigi and @cpuguy83, this should not 362 // affect us, because here we do not read into memory, hence should 363 // not be vulnerable to this code consuming memory. 364 //nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec) 365 if _, err = io.Copy(rebasedTar, srcTar); err != nil { 366 w.CloseWithError(err) 367 return 368 } 369 } 370 }() 371 372 return rebased 373 } 374 375 // CopyResource performs an archive copy from the given source path to the 376 // given destination path. The source path MUST exist and the destination 377 // path's parent directory must exist. 378 func CopyResource(srcPath, dstPath string, followLink bool) error { 379 var ( 380 srcInfo CopyInfo 381 err error 382 ) 383 384 // Ensure in platform semantics 385 srcPath = normalizePath(srcPath) 386 dstPath = normalizePath(dstPath) 387 388 // Clean the source and destination paths. 389 srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath) 390 dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath) 391 392 if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil { 393 return err 394 } 395 396 content, err := TarResource(srcInfo) 397 if err != nil { 398 return err 399 } 400 defer content.Close() 401 402 return CopyTo(content, srcInfo, dstPath) 403 } 404 405 // CopyTo handles extracting the given content whose 406 // entries should be sourced from srcInfo to dstPath. 407 func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error { 408 // The destination path need not exist, but CopyInfoDestinationPath will 409 // ensure that at least the parent directory exists. 410 dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath)) 411 if err != nil { 412 return err 413 } 414 415 dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo) 416 if err != nil { 417 return err 418 } 419 defer copyArchive.Close() 420 421 options := &TarOptions{ 422 NoLchown: true, 423 NoOverwriteDirNonDir: true, 424 } 425 426 return Untar(copyArchive, dstDir, options) 427 } 428 429 // ResolveHostSourcePath decides real path need to be copied with parameters such as 430 // whether to follow symbol link or not, if followLink is true, resolvedPath will return 431 // link target of any symbol link file, else it will only resolve symlink of directory 432 // but return symbol link file itself without resolving. 433 func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) { 434 if followLink { 435 resolvedPath, err = filepath.EvalSymlinks(path) 436 if err != nil { 437 return 438 } 439 440 resolvedPath, rebaseName = GetRebaseName(path, resolvedPath) 441 } else { 442 dirPath, basePath := filepath.Split(path) 443 444 // if not follow symbol link, then resolve symbol link of parent dir 445 var resolvedDirPath string 446 resolvedDirPath, err = filepath.EvalSymlinks(dirPath) 447 if err != nil { 448 return 449 } 450 // resolvedDirPath will have been cleaned (no trailing path separators) so 451 // we can manually join it with the base path element. 452 resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath 453 if hasTrailingPathSeparator(path) && 454 filepath.Base(path) != filepath.Base(resolvedPath) { 455 rebaseName = filepath.Base(path) 456 } 457 } 458 return resolvedPath, rebaseName, nil 459 } 460 461 // GetRebaseName normalizes and compares path and resolvedPath, 462 // return completed resolved path and rebased file name 463 func GetRebaseName(path, resolvedPath string) (string, string) { 464 // linkTarget will have been cleaned (no trailing path separators and dot) so 465 // we can manually join it with them 466 var rebaseName string 467 if specifiesCurrentDir(path) && 468 !specifiesCurrentDir(resolvedPath) { 469 resolvedPath += string(filepath.Separator) + "." 470 } 471 472 if hasTrailingPathSeparator(path) && 473 !hasTrailingPathSeparator(resolvedPath) { 474 resolvedPath += string(filepath.Separator) 475 } 476 477 if filepath.Base(path) != filepath.Base(resolvedPath) { 478 // In the case where the path had a trailing separator and a symlink 479 // evaluation has changed the last path component, we will need to 480 // rebase the name in the archive that is being copied to match the 481 // originally requested name. 482 rebaseName = filepath.Base(path) 483 } 484 return resolvedPath, rebaseName 485 }