github.com/apcera/util@v0.0.0-20180322191801-7a50bc84ee48/tarhelper/tar.go (about) 1 // Copyright 2012-2016 Apcera Inc. All rights reserved. 2 3 package tarhelper 4 5 import ( 6 "archive/tar" 7 "compress/gzip" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "os" 12 "path" 13 "path/filepath" 14 "regexp" 15 "strings" 16 ) 17 18 // User options enumeration type. This encodes the control options provided 19 // by user. 20 type UserOption int 21 22 // DirStack tracks circular symbolic links for the dereference archive option. 23 // Declaring a type here to highlight the semantics. 24 type DirStack []string 25 26 // ignoreInfo expands the idea of excluding a path by also specifying metadata 27 // about the regexp and how to process a match. 28 type ignoreInfo struct { 29 // regexp is the regular expression responsible for deciding matches. 30 regexp *regexp.Regexp 31 32 // exclude specifies whether or not the matched file should be excluded or 33 // included. This allows subsequent matches to reinclude files previously 34 // excluded. 35 exclude bool 36 37 // dirOnly will consider the regexp a match only if it is also a directory. 38 dirOnly bool 39 } 40 41 // TarCustomHandler are used to inject custom behavior for handling file entries 42 // going into a tar file. For more information, see Tar.CustomerHandlers 43 // description. 44 type TarCustomHandler func(fullpath string, fi os.FileInfo, header *tar.Header) (bool, error) 45 46 // TarCustomHook can inject additional header and file data into the archive. For 47 // more information, see Tar.PrefixHook and Tar.SuffixHook. 48 type TarCustomHook func(archive *tar.Writer) error 49 50 // Tar manages state for a TAR archive. 51 type Tar struct { 52 target string 53 54 // The destination writer 55 dest io.Writer 56 57 // The archive/tar reader that we will use to extract each 58 // element from the tar file. This will be set when Extract() 59 // is called. 60 archive *tar.Writer 61 62 // The Compression being used in this tar. 63 Compression Compression 64 65 // Set to true if archiving should attempt to preserve 66 // permissions as it was on the filesystem. If this is false then 67 // files will be archived with basic file/directory permissions. 68 IncludePermissions bool 69 70 // Set to true to perserve ownership of files and directories. If set to 71 // false, the Uid and Gid will be set as 500, which is the first Uid/Gid 72 // reserved for normal users. 73 IncludeOwners bool 74 75 // ignorePaths contains any paths that a user may want to exclude from the 76 // tar. Anything included in any paths set on this field will not be 77 // included in the tar. 78 ignorePaths []ignoreInfo 79 80 // If set, this will be a virtual path that is prepended to the 81 // file location. This allows the target to be under a temp directory 82 // but have it packaged as though it was under another directory, such as 83 // taring /tmp/build, and having 84 // /tmp/build/bin/foo be /var/lib/build/bin/foo 85 // in the tar archive. 86 VirtualPath string 87 88 // This is used to track potential hard links. We check the number of links 89 // and push the inode on here when archiving to see if we run across the 90 // inode again later. 91 hardLinks map[uint64]string 92 93 // OwnerMappingFunc is used to give the caller the ability to control the 94 // mapping of UIDs in the tar into what they should be on the host. The 95 // function is only used when IncludeOwners is true. The function is passed in 96 // the UID of the file on the filesystem and is expected to return a UID to 97 // use within the tar file. It can also return an error if it is unable to 98 // choose a UID or the UID is not allowed. 99 OwnerMappingFunc func(int) (int, error) 100 101 // GroupMappingFunc is used to give the caller the ability to control the 102 // mapping of GIDs in the tar into what they should be on the host. The 103 // function is only used when IncludeOwners is true. The function is passed in 104 // the GID of the file on the filesystem and is expected to return a GID to 105 // use within the tar file. It can also return an error if it is unable to 106 // choose a GID or the GID is not allowed. 107 GroupMappingFunc func(int) (int, error) 108 109 // ExcludeRootPath ensures the resulting tarball does not include 110 // a header entry for "./". This prevents untarring from modifying 111 // the parent directory. 112 ExcludeRootPath bool 113 114 // User provided control options. UserOption enum has the 115 // definitions and explanations for the various flags. 116 UserOptions UserOption 117 118 // CustomHandlers is used to allow the code calling tarhelper to inject custom 119 // logic for how to handle certain entries being written to the tar file. The 120 // Tar handler will loop over and call to these functions. They return a 121 // boolean which should be true when the built in logic for handling the file 122 // should be skipped. They also return an error which will cause the tar 123 // function to abort and bubble up the handler's error. The functions are 124 // passed the path where the entry are located on disk, the os.FileInfo for 125 // the file, and the *tar.Header entry for it. 126 CustomHandlers []TarCustomHandler 127 128 // PrefixHook executes before the file system is traversed and can be used to inject 129 // content into the archive which does not exist within the file system tree. This 130 // content will be extracted before any file system data. 131 PrefixHook TarCustomHook 132 133 // SuffixHook executes after the file system is traversed and like PrefixHook can be 134 // used to inject additional content into the archive. This content will be extracted 135 // after data from the file system. 136 SuffixHook TarCustomHook 137 } 138 139 // UserOption definitions. 140 const ( 141 c_DEREF UserOption = 1 << iota // Follow symbolic links when archiving. 142 ) 143 144 // Mode constants from the tar spec. 145 const ( 146 c_ISUID = 04000 // Set uid 147 c_ISGID = 02000 // Set gid 148 c_ISDIR = 040000 149 c_ISFIFO = 010000 150 c_ISREG = 0100000 151 c_ISLNK = 0120000 152 c_ISBLK = 060000 153 c_ISCHR = 020000 154 c_ISSOCK = 0140000 155 ) 156 157 // NewTar returns a Tar ready to write the contents of targetDir to w. 158 func NewTar(w io.Writer, targetDir string) *Tar { 159 return &Tar{ 160 target: targetDir, 161 dest: w, 162 hardLinks: make(map[uint64]string), 163 IncludePermissions: true, 164 IncludeOwners: false, 165 OwnerMappingFunc: defaultMappingFunc, 166 GroupMappingFunc: defaultMappingFunc, 167 } 168 } 169 170 func (t *Tar) Archive() error { 171 defer func() { 172 if t.archive != nil { 173 t.archive.Close() 174 t.archive = nil 175 } 176 }() 177 178 // Create a TarWriter that wraps the proper io.Writer object 179 // the implements the expected compression for this file. 180 switch t.Compression { 181 case NONE: 182 t.archive = tar.NewWriter(t.dest) 183 case GZIP: 184 dest := gzip.NewWriter(t.dest) 185 defer dest.Close() 186 t.archive = tar.NewWriter(dest) 187 case BZIP2: 188 return fmt.Errorf("bzip2 compression is not supported") 189 case DETECT: 190 return fmt.Errorf("not a valid compression type: %v", DETECT) 191 default: 192 return fmt.Errorf("unknown compression type: %v", t.Compression) 193 } 194 195 // ensure the target exists 196 f, err := os.Stat(t.target) 197 if err != nil { 198 return err 199 } 200 201 if t.PrefixHook != nil { 202 err = t.PrefixHook(t.archive) 203 if err != nil { 204 return err 205 } 206 } 207 208 // If the target is a file rather than a directory, adjust our initial entry 209 // name and target. It will still get just that directory, but want to ensure 210 // we don't tar a file named "." 211 startFullName := "." 212 if !f.IsDir() { 213 t.target = filepath.Dir(t.target) 214 startFullName = filepath.Join(".", f.Name()) 215 } 216 217 // walk the directory tree 218 if err := t.processEntry(startFullName, f, []string{}); err != nil { 219 return err 220 } 221 222 if t.SuffixHook != nil { 223 err = t.SuffixHook(t.archive) 224 if err != nil { 225 return err 226 } 227 } 228 229 return nil 230 } 231 232 // ExcludePath appends a path, file, or pattern relative to the toplevel path to 233 // be archived that is then excluded from the final archive. 234 // pathRE is a regex that will be anchored at the start and end then applied to 235 // the entire filename (full path and basename) 236 func (t *Tar) ExcludePath(pathRE string) { 237 if pathRE != "" { 238 re, err := regexp.Compile("^" + pathRE + "$") 239 if err != nil { 240 return 241 } 242 t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: true, dirOnly: false}) 243 } 244 } 245 246 // IncludePath appends a path, file, or pattern relative to the toplevel path to 247 // be archived that is then excluded from the final archive. 248 // pathRE is a regex that will be anchored at the start and end then applied to 249 // the entire filename (full path and basename) 250 func (t *Tar) IncludePath(pathRE string) { 251 if pathRE != "" { 252 re, err := regexp.Compile("^" + pathRE + "$") 253 if err != nil { 254 return 255 } 256 t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: false, dirOnly: false}) 257 } 258 } 259 260 // IncludeRegexp adds a Regexp into the list to consider when selectiong files 261 // to exclude. Files or directories matching the regexp will _not_ be excluded, 262 // even if they matched a previous Regexp. Files are only considered a match if 263 // they match the Regexp and isDir is false. 264 func (t *Tar) IncludeRegexp(re *regexp.Regexp, dirOnly bool) { 265 t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: false, dirOnly: dirOnly}) 266 } 267 268 // ExcludeRegexp adds a Regexp into the list to consider when selectiong files 269 // to exclude. Files or directories matching the regexp will be excluded, even 270 // if they matched a previous Regexp from IncludeRegexp. Files are only 271 // considered a match if they match the Regexp and isDir is false. 272 func (t *Tar) ExcludeRegexp(re *regexp.Regexp, dirOnly bool) { 273 t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: true, dirOnly: dirOnly}) 274 } 275 276 func (t *Tar) processDirectory(dir string, dirStack []string) error { 277 // get directory entries 278 files, err := ioutil.ReadDir(filepath.Join(t.target, dir)) 279 if err != nil { 280 return err 281 } 282 283 for _, f := range files { 284 fullName := filepath.Join(dir, f.Name()) 285 if err := t.processEntry(fullName, f, dirStack); err != nil { 286 return err 287 } 288 } 289 290 return nil 291 } 292 293 func (t *Tar) processEntry(fullName string, f os.FileInfo, dirStack []string) error { 294 var err error 295 296 // Exclude any files or paths specified by the user. 297 if t.shouldBeExcluded(fullName, f.IsDir()) { 298 return nil 299 } 300 301 // set base header parameters 302 header, err := tar.FileInfoHeader(f, "") 303 if err != nil { 304 return err 305 } 306 307 // Correct Windows paths so untar works in stager's container. 308 header.Name = path.Join(".", filepath.ToSlash(fullName)) 309 310 // handle VirtualPath 311 if t.VirtualPath != "" { 312 header.Name = path.Join(".", filepath.ToSlash(t.VirtualPath), header.Name) 313 } 314 315 // copy uid/gid if Permissions enabled 316 if t.IncludeOwners { 317 if header.Uid, err = t.OwnerMappingFunc(uidForFileInfo(f)); err != nil { 318 return fmt.Errorf("failed to map UID for %q: %v", header.Name, err) 319 } 320 if header.Gid, err = t.GroupMappingFunc(gidForFileInfo(f)); err != nil { 321 return fmt.Errorf("failed to map GID for %q: %v", header.Name, err) 322 } 323 } else { 324 header.Uid = 500 325 header.Gid = 500 326 } 327 328 // Check for any custom handlers that will process it. 329 for _, handler := range t.CustomHandlers { 330 bypass, err := handler(filepath.Join(t.target, fullName), f, header) 331 if err != nil { 332 return err 333 } 334 if bypass { 335 // write the header 336 err = t.archive.WriteHeader(header) 337 if err != nil { 338 return err 339 } 340 return nil 341 } 342 } 343 344 // Use built in handlers. 345 mode := f.Mode() 346 switch { 347 // directory handling 348 case f.IsDir(): 349 // if Permissions is not enabled, force mode back to 0755 350 if !t.IncludePermissions { 351 header.Mode = 0755 352 } 353 354 // update directory specific values, tarballs often append with a slash 355 header.Name = header.Name + "/" 356 357 // write the header 358 if !t.excludeRootPath(header.Name) { 359 err = t.archive.WriteHeader(header) 360 if err != nil { 361 return err 362 } 363 } 364 365 // Push the directory to stack 366 p, err := filepath.Abs(fullName) 367 if err != nil { 368 return fmt.Errorf("error getting absolute path for path %q, err='%v'\n", fullName, err) 369 } 370 371 // process the directory's entries next 372 if err = t.processDirectory(fullName, append(dirStack, p)); err != nil { 373 return err 374 } 375 376 // symlink handling 377 case mode&os.ModeSymlink == os.ModeSymlink: 378 // if Permissions is not enabled, force mode back to 0755 379 if !t.IncludePermissions { 380 header.Mode = 0755 381 } 382 383 // read and process the link 384 link, err := cleanLinkName(t.target, fullName) 385 if err != nil { 386 return err 387 } 388 389 if t.UserOptions&c_DEREF != 0 { 390 // Evaluate the path for the link. This will give us the 391 // complete absolute path with all symlinks resolved. 392 slink, err := filepath.EvalSymlinks(link) 393 if err != nil { 394 return fmt.Errorf("error evaluating symlink %q, err='%v'", link, err) 395 } 396 397 for _, elem := range dirStack { 398 if slink == elem { 399 // We don't want to abort if we detect a cycle. 400 // Let it continue without this path element. 401 return nil 402 } 403 } 404 405 // Ok we are not in a circular path. Proceed. 406 f, err := os.Stat(slink) 407 if err != nil { 408 return fmt.Errorf("error getting file stat for %q, err='%v'", slink, err) 409 } 410 411 if f.IsDir() { 412 // Write the header so that the symlinked directory contents appears 413 // under current dir. 414 header, err := tar.FileInfoHeader(f, "") 415 if err != nil { 416 return err 417 } 418 header.Name = "./" + fullName + "/" 419 420 // write the header 421 err = t.archive.WriteHeader(header) 422 if err != nil { 423 return err 424 } 425 426 return t.processDirectory(fullName, append(dirStack, slink)) 427 } else { 428 return t.processEntry(fullName, f, dirStack) 429 } 430 431 } else { 432 dir := filepath.Dir(fullName) 433 // If the link path contains the target path, then convert the link to be 434 // relative. This ensures it is properly preserved wherever it is later 435 // extracted. If it is a path outside the target, then preserve it as an 436 // absolute path. 437 if strings.Contains(link, t.target) { 438 // remove the targetdir to ensure the link is relative 439 link, err = filepath.Rel(filepath.Join(t.target, dir), link) 440 if err != nil { 441 return err 442 } 443 } 444 445 header.Linkname = link 446 // write the header 447 err = t.archive.WriteHeader(header) 448 if err != nil { 449 return err 450 } 451 452 } 453 454 // regular file handling 455 case mode&os.ModeType == 0: 456 // if Permissions is not enabled, force mode back to 0644 457 if !t.IncludePermissions { 458 header.Mode = 0644 459 } 460 461 // Necessary to ensure files from Windows have +x bit written. 462 chmodTarEntry(header) 463 464 // check to see if this is a hard link 465 if linkCountForFileInfo(f) > 1 { 466 inode := inodeForFileInfo(f) 467 if dst, ok := t.hardLinks[inode]; ok { 468 // update the header if it is 469 header.Typeflag = tar.TypeLink 470 header.Linkname = dst 471 header.Size = 0 472 } else { 473 // push it on the list, and continue to write it as a file 474 // this is our first time seeing it 475 t.hardLinks[inode] = header.Name 476 } 477 } 478 479 // write the header 480 err = t.archive.WriteHeader(header) 481 if err != nil { 482 return err 483 } 484 485 // only write the file if tye type is still a regular file 486 if header.Typeflag == tar.TypeReg { 487 // open the file and copy 488 data, err := os.Open(filepath.Join(t.target, fullName)) 489 if err != nil { 490 return err 491 } 492 _, err = io.Copy(t.archive, data) 493 if err != nil { 494 data.Close() 495 return err 496 } 497 498 // important to flush before the file is closed 499 err = t.archive.Flush() 500 if err != nil { 501 data.Close() 502 return err 503 } 504 // we want to ensure the file is closed in the loop 505 data.Close() 506 } 507 508 // device support 509 case mode&os.ModeDevice == os.ModeDevice || 510 mode&os.ModeCharDevice == os.ModeCharDevice: 511 // 512 // stat to get devmode 513 fi, err := os.Stat(filepath.Join(t.target, fullName)) 514 header.Devmajor, header.Devminor = osDeviceNumbersForFileInfo(fi) 515 516 // write the header 517 err = t.archive.WriteHeader(header) 518 if err != nil { 519 return err 520 } 521 522 // socket handling 523 case mode&os.ModeSocket == os.ModeSocket: 524 // skip... gnutar does, so we will 525 default: 526 } 527 528 return nil 529 } 530 531 func cleanLinkName(targetDir, name string) (string, error) { 532 dir := filepath.Dir(name) 533 534 // read the link 535 link, err := os.Readlink(filepath.Join(targetDir, name)) 536 if err != nil { 537 return "", err 538 } 539 540 // if the target isn't absolute, make it absolute 541 // even if it is absolute, we want to convert it to be relative 542 if !filepath.IsAbs(link) { 543 link, err = filepath.Abs(filepath.Join(targetDir, dir, link)) 544 if err != nil { 545 return "", err 546 } 547 } 548 549 // do a quick clean pass 550 link = filepath.Clean(link) 551 552 return link, nil 553 } 554 555 // shouldBeExcluded determines if supplied name is contained in the slice of 556 // files to exclude. ignorePaths are considered in order so that files excluded 557 // by one criteria can be reincluded by a later one. 558 func (t *Tar) shouldBeExcluded(name string, isDir bool) bool { 559 name = filepath.ToSlash(filepath.Clean(name)) 560 var exclude bool 561 for _, re := range t.ignorePaths { 562 if re.regexp.MatchString(name) || re.regexp.MatchString(filepath.Base(name)) { 563 if !re.dirOnly || (re.dirOnly && isDir) { 564 exclude = re.exclude 565 } 566 } 567 } 568 569 return exclude 570 } 571 572 // excludeRootPath determines if the path is the root path and should be 573 // excluded. 574 func (t *Tar) excludeRootPath(headerName string) bool { 575 if t.ExcludeRootPath && headerName == "./" { 576 return true 577 } 578 579 return false 580 }