github.com/apcera/util@v0.0.0-20180322191801-7a50bc84ee48/tarhelper/untar.go (about) 1 // Copyright 2012-2016 Apcera Inc. All rights reserved. 2 3 package tarhelper 4 5 import ( 6 "archive/tar" 7 "fmt" 8 "io" 9 "os" 10 "os/user" 11 "path/filepath" 12 "runtime" 13 "strconv" 14 "strings" 15 "syscall" 16 ) 17 18 // The type of compression that this archive will be us 19 type Compression string 20 21 const ( 22 NONE = Compression("") 23 BZIP2 = Compression("bzip2") 24 GZIP = Compression("gzip") 25 DETECT = Compression("detect") 26 27 WindowsMaxPathLen = 260 // characters 28 ) 29 30 // UntarCustomHandler are used to inject custom behavior for handling in a tar 31 // file. For more information, see Untar.CustomerHandlers description. 32 type UntarCustomHandler func(rootpath string, header *tar.Header, reader io.Reader) (bool, error) 33 34 type resolvedLink struct { 35 src string 36 dst string 37 } 38 39 // Untar manages state of a TAR archive to be extracted. 40 type Untar struct { 41 // The directory that the files will be extracted into. This will 42 // be the root for all paths contained within the tar file. 43 target string 44 45 // The source reader. 46 source io.Reader 47 48 // A list of currently resolved links. This is used to ensure when creating 49 // a file that follows through a symlink, we create the file relative to the 50 // location of the AbsoluteRoot. 51 resolvedLinks []resolvedLink 52 53 // The AbsoluteRoot is intended to be the root of the target and allows us 54 // to create files that follow through links that are absolute paths, but 55 // ensure the file is created relative to the AbsoluteRoot and not the root 56 // on the host system. 57 AbsoluteRoot string 58 59 // The Compression being used in this tar. 60 Compression Compression 61 62 // The archive/tar reader that we will use to extract each 63 // element from the tar file. This will be set when Extract() 64 // is called. 65 archive *tar.Reader 66 67 // Set to true if extraction should attempt to preserve 68 // permissions as recorded in the tar file. If this is false then 69 // files will be created with a default of 755 for directories and 644 70 // for files. 71 PreservePermissions bool 72 73 // Set to true if extraction should attempt to restore owners of files 74 // and directories from the archive. Any Uid/Gid over 500 will be set 75 // to the MappedUserID/MappedGroupID setting. If this is set to false 76 // it will default to all files going to the MappedUserID/MappedGroupID. 77 PreserveOwners bool 78 79 // SkipSpecialDevices can be used to skip extracting special devices defiend 80 // within the tarball. This includes things like character or block devices. 81 SkipSpecialDevices bool 82 83 // The default UID to set files with an owner over 500 to. If PreserveOwners 84 // is false, this will be the UID assigned for all files in the archive. 85 // This defaults to the UID of the current running user. 86 MappedUserID int 87 88 // The default GID to set files with an owner over 500 to. If PreserveOwners 89 // is false, this will be the GID assigned for all files in the archive. 90 // This defaults to the GID of the current running user. 91 MappedGroupID int 92 93 // IncludedPermissionMask is combined with the uploaded file mask as a way to 94 // ensure a base level of permissions for all objects. 95 IncludedPermissionMask os.FileMode 96 97 // PathWhitelist provides a list of files that will only be extracted from the 98 // provided tarball. If PathWhitelist is not set, then all files will be 99 // allowed. If it is set, then only files matching the specified files 100 // (/etc/file) or directories (/etc/dir/) will be allowed. 101 PathWhitelist []string 102 103 // OwnerMappingFunc is used to give the caller the ability to control the 104 // mapping of UIDs in the tar into what they should be on the host. It is only 105 // used when PreserveOwners is true. The function is passed in the UID of the 106 // file being extracted and is expected to return a UID to use for the actual 107 // file. It can also return an error if it is unable to choose a UID or the 108 // UID is not allowed. 109 OwnerMappingFunc func(int) (int, error) 110 111 // GroupMappingFunc is used to give the caller the ability to control the 112 // mapping of GIDs in the tar into what they should be on the host. It is only 113 // used when PreserveOwners is true. The function is passed in the GID of the 114 // file being extracted and is expected to return a GID to use for the actual 115 // file. It can also return an error if it is unable to choose a GID or the 116 // GID is not allowed. 117 GroupMappingFunc func(int) (int, error) 118 119 // CustomHandlers is used to allow the code calling tarhelper to inject custom 120 // logic for how to handle certain entries within the tar file. The Untar 121 // handler will loop over and call to these functions. They return a boolean 122 // which should be true when the built in logic for handling the tar entry 123 // should be skipped. They also return an error which will cause the untar 124 // function to abort and bubble up the handler's error. The functions are 125 // passed the root path where the tar is being extracted on disk, the 126 // *tar.Header entry, and an io.Reader to the entry's contents (if it is a 127 // file). 128 CustomHandlers []UntarCustomHandler 129 } 130 131 // NewUntar returns an Untar to use to extract the contents of r into targetDir. 132 // Extraction is handled by Extract(). 133 func NewUntar(r io.Reader, targetDir string) *Untar { 134 u := &Untar{ 135 source: r, 136 target: targetDir, 137 PreservePermissions: true, 138 PreserveOwners: false, 139 AbsoluteRoot: "/", 140 resolvedLinks: make([]resolvedLink, 0), 141 OwnerMappingFunc: defaultMappingFunc, 142 GroupMappingFunc: defaultMappingFunc, 143 } 144 145 // loop up the current user for mapping of files 146 // only do it if err != nil 147 if usr, err := user.Current(); err != nil { 148 if usr == nil { 149 u.MappedUserID = 500 150 u.MappedGroupID = 500 151 } else { 152 if u.MappedUserID, err = strconv.Atoi(usr.Uid); err != nil { 153 u.MappedUserID = 500 154 } 155 if u.MappedGroupID, err = strconv.Atoi(usr.Gid); err != nil { 156 u.MappedGroupID = 500 157 } 158 } 159 } else { 160 u.MappedUserID = 500 161 u.MappedGroupID = 500 162 } 163 164 return u 165 } 166 167 // Extract unpacks the tar reader that was passed into New(). This is 168 // broken out from new to give the caller time to set various 169 // settings in the Untar object. 170 func (u *Untar) Extract() error { 171 // check for detect mode before the main setup, we'll change compression 172 // to the intended type and setup a new reader to re-read the header 173 switch u.Compression { 174 case NONE: 175 u.archive = tar.NewReader(u.source) 176 177 case DETECT: 178 arch, err := DetectArchiveCompression(u.source) 179 if err != nil { 180 return err 181 } 182 u.archive = arch 183 184 default: 185 // Look up the compression handler 186 comp, exists := decompressorTypes[string(u.Compression)] 187 if !exists { 188 return fmt.Errorf("unrecognized decompression type %q", u.Compression) 189 } 190 191 // Create the reader 192 arch, err := comp.NewReader(u.source) 193 if err != nil { 194 return err 195 } 196 defer func() { 197 if cl, ok := arch.(io.ReadCloser); ok { 198 cl.Close() 199 } 200 }() 201 u.archive = tar.NewReader(arch) 202 } 203 204 for { 205 header, err := u.archive.Next() 206 if err == io.EOF { 207 // EOF, ok, break to return 208 break 209 } 210 if err != nil { 211 // See note on logging above. 212 return err 213 } 214 215 err = u.processEntry(header) 216 if err != nil { 217 // See note on logging above. 218 return err 219 } 220 } 221 222 return nil 223 } 224 225 // Checks the security of the given name. Anything that looks 226 // fishy will be rejected. 227 func checkName(name string) error { 228 if len(name) == 0 { 229 return fmt.Errorf("No name given for tar element.") 230 } 231 comp := strings.Split(name, string(os.PathSeparator)) 232 if len(comp) > 0 && comp[0] == "" { 233 return fmt.Errorf("No absolute paths allowed.") 234 } 235 for i, c := range comp { 236 switch { 237 case c == "" && i != len(comp)-1: 238 // don't allow an empty name, unless it is the last element... handles 239 // cases where we may have "./" come in as the name 240 return fmt.Errorf("Empty name in file path.") 241 case c == "..": 242 return fmt.Errorf("Double dots not allowed in path.") 243 } 244 } 245 return nil 246 } 247 248 // Checks the security of the given link name. Anything that looks fishy 249 // will be rejected. 250 func checkLinkName(dest, src, targetBase string) error { 251 if len(dest) == 0 { 252 return fmt.Errorf("No name given for tar element.") 253 } 254 return nil 255 } 256 257 // Processes a single header/body combination from the tar 258 // archive being processed in Extract() above. 259 func (u *Untar) processEntry(header *tar.Header) error { 260 // Check the security of the name being given to us by tar. 261 // If the name contains any bad things then we force 262 // an error in order to protect ourselves. 263 if err := checkName(header.Name); err != nil { 264 return err 265 } 266 267 // Ensure that the file is allowed against the current whitelist, if one is 268 // specified. 269 if !u.checkEntryAgainstWhitelist(header) { 270 return nil 271 } 272 273 name := filepath.Join(u.target, header.Name) 274 275 // resolve the destination and then reset the name based on the resolution 276 destDir, err := u.resolveDestination(filepath.Dir(name)) 277 if err != nil { 278 return err 279 } 280 281 name = filepath.Join(destDir, filepath.Base(name)) 282 283 // The path length of the extracted file might exceed Windows maximum of 284 // 260 chars. 285 if runtime.GOOS == "windows" { 286 absPath, err := filepath.Abs(name) 287 if err != nil { 288 return fmt.Errorf("failed to validate path length of extracted file %q: %v", name, err) 289 } 290 291 if len(absPath) > WindowsMaxPathLen { 292 return fmt.Errorf("path length of extracted file is %d chars (windows max: %d chars)", len(absPath), WindowsMaxPathLen) 293 } 294 } 295 296 // look at the type to see how we want to remove existing entries 297 switch { 298 case header.Typeflag == tar.TypeDir: 299 // if we are extracting a directory, we want to see if the directory 300 // already exists... if it exists but isn't a directory, we need 301 // to remove it 302 fi, _ := os.Stat(name) 303 if fi != nil { 304 if !fi.IsDir() { 305 os.RemoveAll(name) 306 } 307 } 308 default: 309 os.RemoveAll(name) 310 } 311 312 // process the uid/gid ownership 313 uid, gid := u.MappedUserID, u.MappedGroupID 314 if u.PreserveOwners { 315 if uid, err = u.OwnerMappingFunc(header.Uid); err != nil { 316 return fmt.Errorf("failed to map UID for file: %v", err) 317 } 318 if gid, err = u.GroupMappingFunc(header.Gid); err != nil { 319 return fmt.Errorf("failed to map GID for file: %v", err) 320 } 321 } 322 header.Uid, header.Gid = uid, gid 323 324 // Loop over custom handlers to see if any of them should be used to process the entry. 325 for _, handler := range u.CustomHandlers { 326 var reader io.Reader 327 if header.Typeflag == tar.TypeReg || header.Typeflag == tar.TypeRegA { 328 reader = u.archive 329 } 330 bypass, err := handler(u.target, header, reader) 331 if err != nil { 332 return err 333 } 334 if bypass { 335 return nil 336 } 337 } 338 339 // handle individual types 340 switch { 341 case header.Typeflag == tar.TypeDir: 342 // Handle directories 343 // don't return error if it already exists 344 mode := os.FileMode(0755) 345 if u.PreservePermissions { 346 mode = header.FileInfo().Mode() | u.IncludedPermissionMask 347 } 348 349 // create the directory 350 err := os.MkdirAll(name, mode) 351 if err != nil { 352 return err 353 } 354 355 // Perform a chmod after creation to ensure modes are applied directly, 356 // regardless of umask. 357 if err := os.Chmod(name, mode); err != nil { 358 return err 359 } 360 361 case header.Typeflag == tar.TypeSymlink: 362 // Handle symlinks 363 err := checkLinkName(header.Linkname, name, u.target) 364 if err != nil { 365 return err 366 } 367 368 // have seen links to themselves 369 if name == header.Linkname { 370 break 371 } 372 373 // make the link 374 if err := os.Symlink(header.Linkname, name); err != nil { 375 return err 376 } 377 378 case header.Typeflag == tar.TypeLink: 379 // handle creation of hard links 380 if err := checkLinkName(header.Linkname, name, u.target); err != nil { 381 return err 382 } 383 384 // find the full path, need to ensure it exists 385 link := filepath.Join(u.target, header.Linkname) 386 387 // do the link... no permissions or owners, those carry over 388 if err := os.Link(link, name); err != nil { 389 return err 390 } 391 392 case header.Typeflag == tar.TypeReg || header.Typeflag == tar.TypeRegA: 393 flags := os.O_WRONLY | os.O_CREATE | os.O_EXCL 394 // determine the mode to use 395 mode := os.FileMode(0644) 396 if u.PreservePermissions { 397 mode = header.FileInfo().Mode() | u.IncludedPermissionMask 398 } 399 400 // open the file 401 f, err := os.OpenFile(name, flags, mode) 402 if err != nil { 403 return err 404 } 405 defer f.Close() 406 407 // Perform a chmod after creation to ensure modes are applied directly, 408 // regardless of umask. 409 if err := os.Chmod(name, mode); err != nil { 410 return err 411 } 412 413 // SETUID/SETGID needs to be defered... 414 // The standard chown call is after handling the files, since we want to 415 // just have it one place, and after the file exists. However, chown 416 // will clear the setuid/setgid bit on a file. 417 if header.Mode&c_ISUID != 0 { 418 defer lazyChmod(name, os.ModeSetuid) 419 } 420 if header.Mode&c_ISGID != 0 { 421 defer lazyChmod(name, os.ModeSetgid) 422 } 423 424 // copy the contents 425 n, err := io.Copy(f, u.archive) 426 if err != nil { 427 return err 428 } else if n != header.Size { 429 return fmt.Errorf("Short write while copying file %s", name) 430 } 431 432 case header.Typeflag == tar.TypeBlock || header.Typeflag == tar.TypeChar || header.Typeflag == tar.TypeFifo: 433 // check to see if the flag to skip character/block devices is set, and 434 // simply return if it is 435 if u.SkipSpecialDevices { 436 return nil 437 } 438 439 // determine how to OR the mode 440 devmode := uint32(0) 441 switch header.Typeflag { 442 case tar.TypeChar: 443 devmode = syscall.S_IFCHR 444 case tar.TypeBlock: 445 devmode = syscall.S_IFBLK 446 case tar.TypeFifo: 447 devmode = syscall.S_IFIFO 448 } 449 450 // determine the mode to use 451 mode := os.FileMode(0644) 452 if u.PreservePermissions { 453 mode = header.FileInfo().Mode() | u.IncludedPermissionMask 454 } 455 456 // syscall to mknod 457 dev := makedev(header.Devmajor, header.Devminor) 458 if err := osMknod(name, devmode|uint32(mode), dev); err != nil { 459 return err 460 } 461 462 // Perform a chmod after creation to ensure modes are applied directly, 463 // regardless of umask. 464 if err := os.Chmod(name, mode|os.FileMode(devmode)); err != nil { 465 return err 466 } 467 468 default: 469 return fmt.Errorf("Unrecognized type: %d", header.Typeflag) 470 } 471 472 // apply the uid/gid 473 switch header.Typeflag { 474 case tar.TypeSymlink: 475 os.Lchown(name, header.Uid, header.Gid) 476 case tar.TypeLink: 477 // don't chown on hard links or symlinks. doing this also removes setuid 478 // from mode and the hard link will already pick up the same owner 479 default: 480 os.Chown(name, header.Uid, header.Gid) 481 } 482 483 return nil 484 } 485 486 func (u *Untar) resolveDestination(name string) (string, error) { 487 pathParts := strings.Split(name, string(os.PathSeparator)) 488 489 // On Windows, Split will remove the '\' from "C:\". This would cause 490 // Extract to extract to the wrong directory. Here we detect this issue and 491 // insert the missing trailing '\' when necessary. 492 if runtime.GOOS == "windows" && filepath.IsAbs(name) { 493 pathParts[0] += string(os.PathSeparator) 494 } 495 496 // walk the path parts to find at what point the resolvedLinks deviates 497 i := 0 498 for i, _ = range pathParts { 499 if (i < len(u.resolvedLinks)) && pathParts[i] == u.resolvedLinks[i].src { 500 continue 501 } 502 break 503 } 504 505 // truncate the slice to only the matching pieces 506 u.resolvedLinks = u.resolvedLinks[0:i] 507 508 // special handling for an empty array... 509 // normally it begins with the previous dest, but if it is empty we need to 510 // start with resolving the first path piece 511 if len(u.resolvedLinks) == 0 { 512 p := pathParts[i] 513 514 if p == "" { 515 // Path shouldn't start empty; resolve it from the root. 516 if runtime.GOOS == "windows" { 517 p = filepath.VolumeName(name) 518 } else { 519 p = string(os.PathSeparator) 520 } 521 } 522 523 dst, err := u.convertToDestination(p) 524 if err != nil { 525 return "", err 526 } 527 528 u.resolvedLinks = append( 529 u.resolvedLinks, 530 resolvedLink{src: pathParts[i], dst: dst}) 531 i++ 532 } 533 534 // build up the resolution for the rest of the pieces 535 for j := i; j < len(pathParts); j++ { 536 testPath := filepath.Join( 537 u.resolvedLinks[len(u.resolvedLinks)-1].dst, 538 pathParts[j]) 539 540 dst, err := u.convertToDestination(testPath) 541 if err != nil { 542 return "", err 543 } 544 545 u.resolvedLinks = append( 546 u.resolvedLinks, 547 resolvedLink{src: pathParts[j], dst: dst}) 548 } 549 550 // the last entry is the full resolution 551 return u.resolvedLinks[len(u.resolvedLinks)-1].dst, nil 552 } 553 554 func (u *Untar) convertToDestination(dir string) (string, error) { 555 // Lstat the current element to see if it is a symlink 556 if dir == "" { 557 dir = "." 558 } 559 lstat, err := os.Lstat(dir) 560 if err != nil { 561 // If the error is that the path doesn't exist, we will go ahead and create 562 // it. Normally, tar files have a directory entry before it mentions files 563 // in that directory. This isn't always true. Case in point, Darwin's "tar" 564 // vs its "gnutar", "tar" doesn't if you just do "tar -czf foo.tar foo" 565 // where foo is a directory with files in it. It will reference the files in 566 // "foo" and never "foo" itself. 567 // 568 // NOTE: by the time this is executed, the location of the directory has 569 // already been validated as safe. 570 if os.IsNotExist(err) { 571 if err := u.recursivelyCreateDir(dir); err != nil { 572 return "", err 573 } 574 lstat, err = os.Lstat(dir) 575 } 576 } 577 if err != nil { 578 return "", err 579 } 580 581 // check symlink mode 582 if lstat.Mode()&os.ModeSymlink == os.ModeSymlink { 583 // it is a symlink, now we want to read it and store the dest 584 link, err := os.Readlink(dir) 585 if err != nil { 586 return "", err 587 } 588 589 // if the path is absolute, we want it based on the AbsoluteRoot 590 if filepath.IsAbs(link) { 591 link = filepath.Join(u.AbsoluteRoot, ".", link) 592 } else { 593 // clean up the path to be a more complete dest from the target 594 link = filepath.Join(filepath.Dir(dir), ".", link) 595 } 596 597 // return the link 598 return link, nil 599 } 600 601 // not a symlink, so return the dir 602 return dir, nil 603 } 604 605 // recursivelyCreateDir is used to recursively create multiple elements of a 606 // path individually to ensure the uid/gid mapping functions get applied and 607 // they have the proper owners. 608 func (u *Untar) recursivelyCreateDir(dir string) error { 609 // process the uid/gid ownership 610 uid := u.MappedUserID 611 gid := u.MappedGroupID 612 if u.PreserveOwners { 613 var err error 614 if uid, err = u.OwnerMappingFunc(uid); err != nil { 615 return fmt.Errorf("failed to map UID for file: %v", err) 616 } 617 if gid, err = u.GroupMappingFunc(gid); err != nil { 618 return fmt.Errorf("failed to map GID for file: %v", err) 619 } 620 } 621 622 abs := filepath.IsAbs(dir) 623 parts := strings.Split(dir, string(os.PathSeparator)) 624 if abs { 625 parts = parts[1:] 626 } 627 628 for i := range parts { 629 p := filepath.Join(parts[:i+1]...) 630 if abs { 631 p = string(os.PathSeparator) + p 632 } 633 634 if err := os.Mkdir(p, os.FileMode(0755)); err != nil { 635 if os.IsExist(err) { 636 continue 637 } 638 return err 639 } 640 // We don't error check on chown incase the process is 641 // unprivledged. Additionally, only chown when we actually created it. 642 os.Chown(p, uid, gid) 643 } 644 return nil 645 } 646 647 // checkEntryAgainstWhitelist will check if the specified file should be allowed 648 // to be extracted against the current PathWhitelist. If no PathWhitelist is 649 // allowed, then it will allow all files. 650 func (u *Untar) checkEntryAgainstWhitelist(header *tar.Header) bool { 651 if len(u.PathWhitelist) == 0 { 652 return true 653 } 654 655 name := "/" + filepath.Clean(header.Name) 656 657 for _, p := range u.PathWhitelist { 658 // Whitelist: "/foo" File: "/foo" 659 if p == name { 660 return true 661 } 662 663 if strings.HasSuffix(p, "/") { 664 // Whitelist: "/usr/bin/" Dir: "/usr/bin" 665 if p == name+"/" && header.Typeflag == tar.TypeDir { 666 return true 667 } 668 669 // Whitelist: "/usr/bin/" File: "/usr/bin/bash" 670 if strings.HasPrefix(name, p) { 671 return true 672 } 673 } 674 } 675 676 return false 677 } 678 679 func lazyChmod(name string, m os.FileMode) { 680 if fi, err := os.Stat(name); err == nil { 681 os.Chmod(name, fi.Mode()|m) 682 } 683 }