github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/engine/pkg/archive/copy.go (about) 1 package archive // import "github.com/docker/docker/pkg/archive" 2 3 import ( 4 "archive/tar" 5 "errors" 6 "io" 7 "os" 8 "path/filepath" 9 "strings" 10 11 "github.com/docker/docker/pkg/system" 12 "github.com/sirupsen/logrus" 13 ) 14 15 // Errors used or returned by this file. 16 var ( 17 ErrNotDirectory = errors.New("not a directory") 18 ErrDirNotExists = errors.New("no such directory") 19 ErrCannotCopyDir = errors.New("cannot copy directory") 20 ErrInvalidCopySource = errors.New("invalid copy source content") 21 ) 22 23 // PreserveTrailingDotOrSeparator returns the given cleaned path (after 24 // processing using any utility functions from the path or filepath stdlib 25 // packages) and appends a trailing `/.` or `/` if its corresponding original 26 // path (from before being processed by utility functions from the path or 27 // filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned 28 // path already ends in a `.` path segment, then another is not added. If the 29 // clean path already ends in the separator, then another is not added. 30 func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string, sep byte) string { 31 // Ensure paths are in platform semantics 32 cleanedPath = strings.Replace(cleanedPath, "/", string(sep), -1) 33 originalPath = strings.Replace(originalPath, "/", string(sep), -1) 34 35 if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) { 36 if !hasTrailingPathSeparator(cleanedPath, sep) { 37 // Add a separator if it doesn't already end with one (a cleaned 38 // path would only end in a separator if it is the root). 39 cleanedPath += string(sep) 40 } 41 cleanedPath += "." 42 } 43 44 if !hasTrailingPathSeparator(cleanedPath, sep) && hasTrailingPathSeparator(originalPath, sep) { 45 cleanedPath += string(sep) 46 } 47 48 return cleanedPath 49 } 50 51 // assertsDirectory returns whether the given path is 52 // asserted to be a directory, i.e., the path ends with 53 // a trailing '/' or `/.`, assuming a path separator of `/`. 54 func assertsDirectory(path string, sep byte) bool { 55 return hasTrailingPathSeparator(path, sep) || specifiesCurrentDir(path) 56 } 57 58 // hasTrailingPathSeparator returns whether the given 59 // path ends with the system's path separator character. 60 func hasTrailingPathSeparator(path string, sep byte) bool { 61 return len(path) > 0 && path[len(path)-1] == sep 62 } 63 64 // specifiesCurrentDir returns whether the given path specifies 65 // a "current directory", i.e., the last path segment is `.`. 66 func specifiesCurrentDir(path string) bool { 67 return filepath.Base(path) == "." 68 } 69 70 // SplitPathDirEntry splits the given path between its directory name and its 71 // basename by first cleaning the path but preserves a trailing "." if the 72 // original path specified the current directory. 73 func SplitPathDirEntry(path string) (dir, base string) { 74 cleanedPath := filepath.Clean(filepath.FromSlash(path)) 75 76 if specifiesCurrentDir(path) { 77 cleanedPath += string(os.PathSeparator) + "." 78 } 79 80 return filepath.Dir(cleanedPath), filepath.Base(cleanedPath) 81 } 82 83 // TarResource archives the resource described by the given CopyInfo to a Tar 84 // archive. A non-nil error is returned if sourcePath does not exist or is 85 // asserted to be a directory but exists as another type of file. 86 // 87 // This function acts as a convenient wrapper around TarWithOptions, which 88 // requires a directory as the source path. TarResource accepts either a 89 // directory or a file path and correctly sets the Tar options. 90 func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) { 91 return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName) 92 } 93 94 // TarResourceRebase is like TarResource but renames the first path element of 95 // items in the resulting tar archive to match the given rebaseName if not "". 96 func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) { 97 sourcePath = normalizePath(sourcePath) 98 if _, err = os.Lstat(sourcePath); err != nil { 99 // Catches the case where the source does not exist or is not a 100 // directory if asserted to be a directory, as this also causes an 101 // error. 102 return 103 } 104 105 // Separate the source path between its directory and 106 // the entry in that directory which we are archiving. 107 sourceDir, sourceBase := SplitPathDirEntry(sourcePath) 108 opts := TarResourceRebaseOpts(sourceBase, rebaseName) 109 110 logrus.Debugf("copying %q from %q", sourceBase, sourceDir) 111 return TarWithOptions(sourceDir, opts) 112 } 113 114 // TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase 115 // parameters to be sent to TarWithOptions (the TarOptions struct) 116 func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions { 117 filter := []string{sourceBase} 118 return &TarOptions{ 119 Compression: Uncompressed, 120 IncludeFiles: filter, 121 IncludeSourceDir: true, 122 RebaseNames: map[string]string{ 123 sourceBase: rebaseName, 124 }, 125 } 126 } 127 128 // CopyInfo holds basic info about the source 129 // or destination path of a copy operation. 130 type CopyInfo struct { 131 Path string 132 Exists bool 133 IsDir bool 134 RebaseName string 135 } 136 137 // CopyInfoSourcePath stats the given path to create a CopyInfo 138 // struct representing that resource for the source of an archive copy 139 // operation. The given path should be an absolute local path. A source path 140 // has all symlinks evaluated that appear before the last path separator ("/" 141 // on Unix). As it is to be a copy source, the path must exist. 142 func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) { 143 // normalize the file path and then evaluate the symbol link 144 // we will use the target file instead of the symbol link if 145 // followLink is set 146 path = normalizePath(path) 147 148 resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink) 149 if err != nil { 150 return CopyInfo{}, err 151 } 152 153 stat, err := os.Lstat(resolvedPath) 154 if err != nil { 155 return CopyInfo{}, err 156 } 157 158 return CopyInfo{ 159 Path: resolvedPath, 160 Exists: true, 161 IsDir: stat.IsDir(), 162 RebaseName: rebaseName, 163 }, nil 164 } 165 166 // CopyInfoDestinationPath stats the given path to create a CopyInfo 167 // struct representing that resource for the destination of an archive copy 168 // operation. The given path should be an absolute local path. 169 func CopyInfoDestinationPath(path string) (info CopyInfo, err error) { 170 maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot. 171 path = normalizePath(path) 172 originalPath := path 173 174 stat, err := os.Lstat(path) 175 176 if err == nil && stat.Mode()&os.ModeSymlink == 0 { 177 // The path exists and is not a symlink. 178 return CopyInfo{ 179 Path: path, 180 Exists: true, 181 IsDir: stat.IsDir(), 182 }, nil 183 } 184 185 // While the path is a symlink. 186 for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ { 187 if n > maxSymlinkIter { 188 // Don't follow symlinks more than this arbitrary number of times. 189 return CopyInfo{}, errors.New("too many symlinks in " + originalPath) 190 } 191 192 // The path is a symbolic link. We need to evaluate it so that the 193 // destination of the copy operation is the link target and not the 194 // link itself. This is notably different than CopyInfoSourcePath which 195 // only evaluates symlinks before the last appearing path separator. 196 // Also note that it is okay if the last path element is a broken 197 // symlink as the copy operation should create the target. 198 var linkTarget string 199 200 linkTarget, err = os.Readlink(path) 201 if err != nil { 202 return CopyInfo{}, err 203 } 204 205 if !system.IsAbs(linkTarget) { 206 // Join with the parent directory. 207 dstParent, _ := SplitPathDirEntry(path) 208 linkTarget = filepath.Join(dstParent, linkTarget) 209 } 210 211 path = linkTarget 212 stat, err = os.Lstat(path) 213 } 214 215 if err != nil { 216 // It's okay if the destination path doesn't exist. We can still 217 // continue the copy operation if the parent directory exists. 218 if !os.IsNotExist(err) { 219 return CopyInfo{}, err 220 } 221 222 // Ensure destination parent dir exists. 223 dstParent, _ := SplitPathDirEntry(path) 224 225 parentDirStat, err := os.Stat(dstParent) 226 if err != nil { 227 return CopyInfo{}, err 228 } 229 if !parentDirStat.IsDir() { 230 return CopyInfo{}, ErrNotDirectory 231 } 232 233 return CopyInfo{Path: path}, nil 234 } 235 236 // The path exists after resolving symlinks. 237 return CopyInfo{ 238 Path: path, 239 Exists: true, 240 IsDir: stat.IsDir(), 241 }, nil 242 } 243 244 // PrepareArchiveCopy prepares the given srcContent archive, which should 245 // contain the archived resource described by srcInfo, to the destination 246 // described by dstInfo. Returns the possibly modified content archive along 247 // with the path to the destination directory which it should be extracted to. 248 func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) { 249 // Ensure in platform semantics 250 srcInfo.Path = normalizePath(srcInfo.Path) 251 dstInfo.Path = normalizePath(dstInfo.Path) 252 253 // Separate the destination path between its directory and base 254 // components in case the source archive contents need to be rebased. 255 dstDir, dstBase := SplitPathDirEntry(dstInfo.Path) 256 _, srcBase := SplitPathDirEntry(srcInfo.Path) 257 258 switch { 259 case dstInfo.Exists && dstInfo.IsDir: 260 // The destination exists as a directory. No alteration 261 // to srcContent is needed as its contents can be 262 // simply extracted to the destination directory. 263 return dstInfo.Path, io.NopCloser(srcContent), nil 264 case dstInfo.Exists && srcInfo.IsDir: 265 // The destination exists as some type of file and the source 266 // content is a directory. This is an error condition since 267 // you cannot copy a directory to an existing file location. 268 return "", nil, ErrCannotCopyDir 269 case dstInfo.Exists: 270 // The destination exists as some type of file and the source content 271 // is also a file. The source content entry will have to be renamed to 272 // have a basename which matches the destination path's basename. 273 if len(srcInfo.RebaseName) != 0 { 274 srcBase = srcInfo.RebaseName 275 } 276 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 277 case srcInfo.IsDir: 278 // The destination does not exist and the source content is an archive 279 // of a directory. The archive should be extracted to the parent of 280 // the destination path instead, and when it is, the directory that is 281 // created as a result should take the name of the destination path. 282 // The source content entries will have to be renamed to have a 283 // basename which matches the destination path's basename. 284 if len(srcInfo.RebaseName) != 0 { 285 srcBase = srcInfo.RebaseName 286 } 287 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 288 case assertsDirectory(dstInfo.Path, os.PathSeparator): 289 // The destination does not exist and is asserted to be created as a 290 // directory, but the source content is not a directory. This is an 291 // error condition since you cannot create a directory from a file 292 // source. 293 return "", nil, ErrDirNotExists 294 default: 295 // The last remaining case is when the destination does not exist, is 296 // not asserted to be a directory, and the source content is not an 297 // archive of a directory. It this case, the destination file will need 298 // to be created when the archive is extracted and the source content 299 // entry will have to be renamed to have a basename which matches the 300 // destination path's basename. 301 if len(srcInfo.RebaseName) != 0 { 302 srcBase = srcInfo.RebaseName 303 } 304 return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil 305 } 306 307 } 308 309 // RebaseArchiveEntries rewrites the given srcContent archive replacing 310 // an occurrence of oldBase with newBase at the beginning of entry names. 311 func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser { 312 if oldBase == string(os.PathSeparator) { 313 // If oldBase specifies the root directory, use an empty string as 314 // oldBase instead so that newBase doesn't replace the path separator 315 // that all paths will start with. 316 oldBase = "" 317 } 318 319 rebased, w := io.Pipe() 320 321 go func() { 322 srcTar := tar.NewReader(srcContent) 323 rebasedTar := tar.NewWriter(w) 324 325 for { 326 hdr, err := srcTar.Next() 327 if err == io.EOF { 328 // Signals end of archive. 329 rebasedTar.Close() 330 w.Close() 331 return 332 } 333 if err != nil { 334 w.CloseWithError(err) 335 return 336 } 337 338 // srcContent tar stream, as served by TarWithOptions(), is 339 // definitely in PAX format, but tar.Next() mistakenly guesses it 340 // as USTAR, which creates a problem: if the newBase is >100 341 // characters long, WriteHeader() returns an error like 342 // "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...". 343 // 344 // To fix, set the format to PAX here. See docker/for-linux issue #484. 345 hdr.Format = tar.FormatPAX 346 hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1) 347 if hdr.Typeflag == tar.TypeLink { 348 hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1) 349 } 350 351 if err = rebasedTar.WriteHeader(hdr); err != nil { 352 w.CloseWithError(err) 353 return 354 } 355 356 // Ignoring GoSec G110. See https://github.com/securego/gosec/pull/433 357 // and https://cure53.de/pentest-report_opa.pdf, which recommends to 358 // replace io.Copy with io.CopyN7. The latter allows to specify the 359 // maximum number of bytes that should be read. By properly defining 360 // the limit, it can be assured that a GZip compression bomb cannot 361 // easily cause a Denial-of-Service. 362 // After reviewing with @tonistiigi and @cpuguy83, this should not 363 // affect us, because here we do not read into memory, hence should 364 // not be vulnerable to this code consuming memory. 365 //nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec) 366 if _, err = io.Copy(rebasedTar, srcTar); err != nil { 367 w.CloseWithError(err) 368 return 369 } 370 } 371 }() 372 373 return rebased 374 } 375 376 // TODO @gupta-ak. These might have to be changed in the future to be 377 // continuity driver aware as well to support LCOW. 378 379 // CopyResource performs an archive copy from the given source path to the 380 // given destination path. The source path MUST exist and the destination 381 // path's parent directory must exist. 382 func CopyResource(srcPath, dstPath string, followLink bool) error { 383 var ( 384 srcInfo CopyInfo 385 err error 386 ) 387 388 // Ensure in platform semantics 389 srcPath = normalizePath(srcPath) 390 dstPath = normalizePath(dstPath) 391 392 // Clean the source and destination paths. 393 srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath, os.PathSeparator) 394 dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath, os.PathSeparator) 395 396 if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil { 397 return err 398 } 399 400 content, err := TarResource(srcInfo) 401 if err != nil { 402 return err 403 } 404 defer content.Close() 405 406 return CopyTo(content, srcInfo, dstPath) 407 } 408 409 // CopyTo handles extracting the given content whose 410 // entries should be sourced from srcInfo to dstPath. 411 func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error { 412 // The destination path need not exist, but CopyInfoDestinationPath will 413 // ensure that at least the parent directory exists. 414 dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath)) 415 if err != nil { 416 return err 417 } 418 419 dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo) 420 if err != nil { 421 return err 422 } 423 defer copyArchive.Close() 424 425 options := &TarOptions{ 426 NoLchown: true, 427 NoOverwriteDirNonDir: true, 428 } 429 430 return Untar(copyArchive, dstDir, options) 431 } 432 433 // ResolveHostSourcePath decides real path need to be copied with parameters such as 434 // whether to follow symbol link or not, if followLink is true, resolvedPath will return 435 // link target of any symbol link file, else it will only resolve symlink of directory 436 // but return symbol link file itself without resolving. 437 func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) { 438 if followLink { 439 resolvedPath, err = filepath.EvalSymlinks(path) 440 if err != nil { 441 return 442 } 443 444 resolvedPath, rebaseName = GetRebaseName(path, resolvedPath) 445 } else { 446 dirPath, basePath := filepath.Split(path) 447 448 // if not follow symbol link, then resolve symbol link of parent dir 449 var resolvedDirPath string 450 resolvedDirPath, err = filepath.EvalSymlinks(dirPath) 451 if err != nil { 452 return 453 } 454 // resolvedDirPath will have been cleaned (no trailing path separators) so 455 // we can manually join it with the base path element. 456 resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath 457 if hasTrailingPathSeparator(path, os.PathSeparator) && 458 filepath.Base(path) != filepath.Base(resolvedPath) { 459 rebaseName = filepath.Base(path) 460 } 461 } 462 return resolvedPath, rebaseName, nil 463 } 464 465 // GetRebaseName normalizes and compares path and resolvedPath, 466 // return completed resolved path and rebased file name 467 func GetRebaseName(path, resolvedPath string) (string, string) { 468 // linkTarget will have been cleaned (no trailing path separators and dot) so 469 // we can manually join it with them 470 var rebaseName string 471 if specifiesCurrentDir(path) && 472 !specifiesCurrentDir(resolvedPath) { 473 resolvedPath += string(filepath.Separator) + "." 474 } 475 476 if hasTrailingPathSeparator(path, os.PathSeparator) && 477 !hasTrailingPathSeparator(resolvedPath, os.PathSeparator) { 478 resolvedPath += string(filepath.Separator) 479 } 480 481 if filepath.Base(path) != filepath.Base(resolvedPath) { 482 // In the case where the path had a trailing separator and a symlink 483 // evaluation has changed the last path component, we will need to 484 // rebase the name in the archive that is being copied to match the 485 // originally requested name. 486 rebaseName = filepath.Base(path) 487 } 488 return resolvedPath, rebaseName 489 }