github.com/apcera/util@v0.0.0-20180322191801-7a50bc84ee48/tarhelper/tar.go (about)

     1  // Copyright 2012-2016 Apcera Inc. All rights reserved.
     2  
     3  package tarhelper
     4  
     5  import (
     6  	"archive/tar"
     7  	"compress/gzip"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"regexp"
    15  	"strings"
    16  )
    17  
    18  // User options enumeration type. This encodes the control options provided
    19  // by user.
    20  type UserOption int
    21  
    22  // DirStack tracks circular symbolic links for the dereference archive option.
    23  // Declaring a type here to highlight the semantics.
    24  type DirStack []string
    25  
    26  // ignoreInfo expands the idea of excluding a path by also specifying metadata
    27  // about the regexp and how to process a match.
    28  type ignoreInfo struct {
    29  	// regexp is the regular expression responsible for deciding matches.
    30  	regexp *regexp.Regexp
    31  
    32  	// exclude specifies whether or not the matched file should be excluded or
    33  	// included. This allows subsequent matches to reinclude files previously
    34  	// excluded.
    35  	exclude bool
    36  
    37  	// dirOnly will consider the regexp a match only if it is also a directory.
    38  	dirOnly bool
    39  }
    40  
    41  // TarCustomHandler are used to inject custom behavior for handling file entries
    42  // going into a tar file. For more information, see Tar.CustomerHandlers
    43  // description.
    44  type TarCustomHandler func(fullpath string, fi os.FileInfo, header *tar.Header) (bool, error)
    45  
    46  // TarCustomHook can inject additional header and file data into the archive. For
    47  // more information, see Tar.PrefixHook and Tar.SuffixHook.
    48  type TarCustomHook func(archive *tar.Writer) error
    49  
    50  // Tar manages state for a TAR archive.
    51  type Tar struct {
    52  	target string
    53  
    54  	// The destination writer
    55  	dest io.Writer
    56  
    57  	// The archive/tar reader that we will use to extract each
    58  	// element from the tar file. This will be set when Extract()
    59  	// is called.
    60  	archive *tar.Writer
    61  
    62  	// The Compression being used in this tar.
    63  	Compression Compression
    64  
    65  	// Set to true if archiving should attempt to preserve
    66  	// permissions as it was on the filesystem. If this is false then
    67  	// files will be archived with basic file/directory permissions.
    68  	IncludePermissions bool
    69  
    70  	// Set to true to perserve ownership of files and directories. If set to
    71  	// false, the Uid and Gid will be set as 500, which is the first Uid/Gid
    72  	// reserved for normal users.
    73  	IncludeOwners bool
    74  
    75  	// ignorePaths contains any paths that a user may want to exclude from the
    76  	// tar. Anything included in any paths set on this field will not be
    77  	// included in the tar.
    78  	ignorePaths []ignoreInfo
    79  
    80  	// If set, this will be a virtual path that is prepended to the
    81  	// file location.  This allows the target to be under a temp directory
    82  	// but have it packaged as though it was under another directory, such as
    83  	// taring /tmp/build, and having
    84  	//   /tmp/build/bin/foo be /var/lib/build/bin/foo
    85  	// in the tar archive.
    86  	VirtualPath string
    87  
    88  	// This is used to track potential hard links. We check the number of links
    89  	// and push the inode on here when archiving to see if we run across the
    90  	// inode again later.
    91  	hardLinks map[uint64]string
    92  
    93  	// OwnerMappingFunc is used to give the caller the ability to control the
    94  	// mapping of UIDs in the tar into what they should be on the host. The
    95  	// function is only used when IncludeOwners is true. The function is passed in
    96  	// the UID of the file on the filesystem and is expected to return a UID to
    97  	// use within the tar file. It can also return an error if it is unable to
    98  	// choose a UID or the UID is not allowed.
    99  	OwnerMappingFunc func(int) (int, error)
   100  
   101  	// GroupMappingFunc is used to give the caller the ability to control the
   102  	// mapping of GIDs in the tar into what they should be on the host. The
   103  	// function is only used when IncludeOwners is true. The function is passed in
   104  	// the GID of the file on the filesystem and is expected to return a GID to
   105  	// use within the tar file. It can also return an error if it is unable to
   106  	// choose a GID or the GID is not allowed.
   107  	GroupMappingFunc func(int) (int, error)
   108  
   109  	// ExcludeRootPath ensures the resulting tarball does not include
   110  	// a header entry for "./". This prevents untarring from modifying
   111  	// the parent directory.
   112  	ExcludeRootPath bool
   113  
   114  	// User provided control options. UserOption enum has the
   115  	// definitions and explanations for the various flags.
   116  	UserOptions UserOption
   117  
   118  	// CustomHandlers is used to allow the code calling tarhelper to inject custom
   119  	// logic for how to handle certain entries being written to the tar file. The
   120  	// Tar handler will loop over and call to these functions. They return a
   121  	// boolean which should be true when the built in logic for handling the file
   122  	// should be skipped. They also return an error which will cause the tar
   123  	// function to abort and bubble up the handler's error. The functions are
   124  	// passed the path where the entry are located on disk, the os.FileInfo for
   125  	// the file, and the *tar.Header entry for it.
   126  	CustomHandlers []TarCustomHandler
   127  
   128  	// PrefixHook executes before the file system is traversed and can be used to inject
   129  	// content into the archive which does not exist within the file system tree. This
   130  	// content will be extracted before any file system data.
   131  	PrefixHook TarCustomHook
   132  
   133  	// SuffixHook executes after the file system is traversed and like PrefixHook can be
   134  	// used to inject additional content into the archive. This content will be extracted
   135  	// after data from the file system.
   136  	SuffixHook TarCustomHook
   137  }
   138  
   139  // UserOption definitions.
   140  const (
   141  	c_DEREF UserOption = 1 << iota // Follow symbolic links when archiving.
   142  )
   143  
   144  // Mode constants from the tar spec.
   145  const (
   146  	c_ISUID  = 04000 // Set uid
   147  	c_ISGID  = 02000 // Set gid
   148  	c_ISDIR  = 040000
   149  	c_ISFIFO = 010000
   150  	c_ISREG  = 0100000
   151  	c_ISLNK  = 0120000
   152  	c_ISBLK  = 060000
   153  	c_ISCHR  = 020000
   154  	c_ISSOCK = 0140000
   155  )
   156  
   157  // NewTar returns a Tar ready to write the contents of targetDir to w.
   158  func NewTar(w io.Writer, targetDir string) *Tar {
   159  	return &Tar{
   160  		target:             targetDir,
   161  		dest:               w,
   162  		hardLinks:          make(map[uint64]string),
   163  		IncludePermissions: true,
   164  		IncludeOwners:      false,
   165  		OwnerMappingFunc:   defaultMappingFunc,
   166  		GroupMappingFunc:   defaultMappingFunc,
   167  	}
   168  }
   169  
   170  func (t *Tar) Archive() error {
   171  	defer func() {
   172  		if t.archive != nil {
   173  			t.archive.Close()
   174  			t.archive = nil
   175  		}
   176  	}()
   177  
   178  	// Create a TarWriter that wraps the proper io.Writer object
   179  	// the implements the expected compression for this file.
   180  	switch t.Compression {
   181  	case NONE:
   182  		t.archive = tar.NewWriter(t.dest)
   183  	case GZIP:
   184  		dest := gzip.NewWriter(t.dest)
   185  		defer dest.Close()
   186  		t.archive = tar.NewWriter(dest)
   187  	case BZIP2:
   188  		return fmt.Errorf("bzip2 compression is not supported")
   189  	case DETECT:
   190  		return fmt.Errorf("not a valid compression type: %v", DETECT)
   191  	default:
   192  		return fmt.Errorf("unknown compression type: %v", t.Compression)
   193  	}
   194  
   195  	// ensure the target exists
   196  	f, err := os.Stat(t.target)
   197  	if err != nil {
   198  		return err
   199  	}
   200  
   201  	if t.PrefixHook != nil {
   202  		err = t.PrefixHook(t.archive)
   203  		if err != nil {
   204  			return err
   205  		}
   206  	}
   207  
   208  	// If the target is a file rather than a directory, adjust our initial entry
   209  	// name and target. It will still get just that directory, but want to ensure
   210  	// we don't tar a file named "."
   211  	startFullName := "."
   212  	if !f.IsDir() {
   213  		t.target = filepath.Dir(t.target)
   214  		startFullName = filepath.Join(".", f.Name())
   215  	}
   216  
   217  	// walk the directory tree
   218  	if err := t.processEntry(startFullName, f, []string{}); err != nil {
   219  		return err
   220  	}
   221  
   222  	if t.SuffixHook != nil {
   223  		err = t.SuffixHook(t.archive)
   224  		if err != nil {
   225  			return err
   226  		}
   227  	}
   228  
   229  	return nil
   230  }
   231  
   232  // ExcludePath appends a path, file, or pattern relative to the toplevel path to
   233  // be archived that is then excluded from the final archive.
   234  // pathRE is a regex that will be anchored at the start and end then applied to
   235  // the entire filename (full path and basename)
   236  func (t *Tar) ExcludePath(pathRE string) {
   237  	if pathRE != "" {
   238  		re, err := regexp.Compile("^" + pathRE + "$")
   239  		if err != nil {
   240  			return
   241  		}
   242  		t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: true, dirOnly: false})
   243  	}
   244  }
   245  
   246  // IncludePath appends a path, file, or pattern relative to the toplevel path to
   247  // be archived that is then excluded from the final archive.
   248  // pathRE is a regex that will be anchored at the start and end then applied to
   249  // the entire filename (full path and basename)
   250  func (t *Tar) IncludePath(pathRE string) {
   251  	if pathRE != "" {
   252  		re, err := regexp.Compile("^" + pathRE + "$")
   253  		if err != nil {
   254  			return
   255  		}
   256  		t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: false, dirOnly: false})
   257  	}
   258  }
   259  
   260  // IncludeRegexp adds a Regexp into the list to consider when selectiong files
   261  // to exclude. Files or directories matching the regexp will _not_ be excluded,
   262  // even if they matched a previous Regexp. Files are only considered a match if
   263  // they match the Regexp and isDir is false.
   264  func (t *Tar) IncludeRegexp(re *regexp.Regexp, dirOnly bool) {
   265  	t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: false, dirOnly: dirOnly})
   266  }
   267  
   268  // ExcludeRegexp adds a Regexp into the list to consider when selectiong files
   269  // to exclude. Files or directories matching the regexp will be excluded, even
   270  // if they matched a previous Regexp from IncludeRegexp. Files are only
   271  // considered a match if they match the Regexp and isDir is false.
   272  func (t *Tar) ExcludeRegexp(re *regexp.Regexp, dirOnly bool) {
   273  	t.ignorePaths = append(t.ignorePaths, ignoreInfo{regexp: re, exclude: true, dirOnly: dirOnly})
   274  }
   275  
   276  func (t *Tar) processDirectory(dir string, dirStack []string) error {
   277  	// get directory entries
   278  	files, err := ioutil.ReadDir(filepath.Join(t.target, dir))
   279  	if err != nil {
   280  		return err
   281  	}
   282  
   283  	for _, f := range files {
   284  		fullName := filepath.Join(dir, f.Name())
   285  		if err := t.processEntry(fullName, f, dirStack); err != nil {
   286  			return err
   287  		}
   288  	}
   289  
   290  	return nil
   291  }
   292  
   293  func (t *Tar) processEntry(fullName string, f os.FileInfo, dirStack []string) error {
   294  	var err error
   295  
   296  	// Exclude any files or paths specified by the user.
   297  	if t.shouldBeExcluded(fullName, f.IsDir()) {
   298  		return nil
   299  	}
   300  
   301  	// set base header parameters
   302  	header, err := tar.FileInfoHeader(f, "")
   303  	if err != nil {
   304  		return err
   305  	}
   306  
   307  	// Correct Windows paths so untar works in stager's container.
   308  	header.Name = path.Join(".", filepath.ToSlash(fullName))
   309  
   310  	// handle VirtualPath
   311  	if t.VirtualPath != "" {
   312  		header.Name = path.Join(".", filepath.ToSlash(t.VirtualPath), header.Name)
   313  	}
   314  
   315  	// copy uid/gid if Permissions enabled
   316  	if t.IncludeOwners {
   317  		if header.Uid, err = t.OwnerMappingFunc(uidForFileInfo(f)); err != nil {
   318  			return fmt.Errorf("failed to map UID for %q: %v", header.Name, err)
   319  		}
   320  		if header.Gid, err = t.GroupMappingFunc(gidForFileInfo(f)); err != nil {
   321  			return fmt.Errorf("failed to map GID for %q: %v", header.Name, err)
   322  		}
   323  	} else {
   324  		header.Uid = 500
   325  		header.Gid = 500
   326  	}
   327  
   328  	// Check for any custom handlers that will process it.
   329  	for _, handler := range t.CustomHandlers {
   330  		bypass, err := handler(filepath.Join(t.target, fullName), f, header)
   331  		if err != nil {
   332  			return err
   333  		}
   334  		if bypass {
   335  			// write the header
   336  			err = t.archive.WriteHeader(header)
   337  			if err != nil {
   338  				return err
   339  			}
   340  			return nil
   341  		}
   342  	}
   343  
   344  	// Use built in handlers.
   345  	mode := f.Mode()
   346  	switch {
   347  	// directory handling
   348  	case f.IsDir():
   349  		// if Permissions is not enabled, force mode back to 0755
   350  		if !t.IncludePermissions {
   351  			header.Mode = 0755
   352  		}
   353  
   354  		// update directory specific values, tarballs often append with a slash
   355  		header.Name = header.Name + "/"
   356  
   357  		// write the header
   358  		if !t.excludeRootPath(header.Name) {
   359  			err = t.archive.WriteHeader(header)
   360  			if err != nil {
   361  				return err
   362  			}
   363  		}
   364  
   365  		// Push the directory to stack
   366  		p, err := filepath.Abs(fullName)
   367  		if err != nil {
   368  			return fmt.Errorf("error getting absolute path for path %q, err='%v'\n", fullName, err)
   369  		}
   370  
   371  		// process the directory's entries next
   372  		if err = t.processDirectory(fullName, append(dirStack, p)); err != nil {
   373  			return err
   374  		}
   375  
   376  	// symlink handling
   377  	case mode&os.ModeSymlink == os.ModeSymlink:
   378  		// if Permissions is not enabled, force mode back to 0755
   379  		if !t.IncludePermissions {
   380  			header.Mode = 0755
   381  		}
   382  
   383  		// read and process the link
   384  		link, err := cleanLinkName(t.target, fullName)
   385  		if err != nil {
   386  			return err
   387  		}
   388  
   389  		if t.UserOptions&c_DEREF != 0 {
   390  			// Evaluate the path for the link. This will give us the
   391  			// complete absolute path with all symlinks resolved.
   392  			slink, err := filepath.EvalSymlinks(link)
   393  			if err != nil {
   394  				return fmt.Errorf("error evaluating symlink %q, err='%v'", link, err)
   395  			}
   396  
   397  			for _, elem := range dirStack {
   398  				if slink == elem {
   399  					// We don't want to abort if we detect a cycle.
   400  					// Let it continue  without this path element.
   401  					return nil
   402  				}
   403  			}
   404  
   405  			// Ok we are not in a circular path. Proceed.
   406  			f, err := os.Stat(slink)
   407  			if err != nil {
   408  				return fmt.Errorf("error getting file stat for %q, err='%v'", slink, err)
   409  			}
   410  
   411  			if f.IsDir() {
   412  				// Write the header so that the symlinked directory contents appears
   413  				// under current dir.
   414  				header, err := tar.FileInfoHeader(f, "")
   415  				if err != nil {
   416  					return err
   417  				}
   418  				header.Name = "./" + fullName + "/"
   419  
   420  				// write the header
   421  				err = t.archive.WriteHeader(header)
   422  				if err != nil {
   423  					return err
   424  				}
   425  
   426  				return t.processDirectory(fullName, append(dirStack, slink))
   427  			} else {
   428  				return t.processEntry(fullName, f, dirStack)
   429  			}
   430  
   431  		} else {
   432  			dir := filepath.Dir(fullName)
   433  			// If the link path contains the target path, then convert the link to be
   434  			// relative. This ensures it is properly preserved wherever it is later
   435  			// extracted. If it is a path outside the target, then preserve it as an
   436  			// absolute path.
   437  			if strings.Contains(link, t.target) {
   438  				// remove the targetdir to ensure the link is relative
   439  				link, err = filepath.Rel(filepath.Join(t.target, dir), link)
   440  				if err != nil {
   441  					return err
   442  				}
   443  			}
   444  
   445  			header.Linkname = link
   446  			// write the header
   447  			err = t.archive.WriteHeader(header)
   448  			if err != nil {
   449  				return err
   450  			}
   451  
   452  		}
   453  
   454  	// regular file handling
   455  	case mode&os.ModeType == 0:
   456  		// if Permissions is not enabled, force mode back to 0644
   457  		if !t.IncludePermissions {
   458  			header.Mode = 0644
   459  		}
   460  
   461  		// Necessary to ensure files from Windows have +x bit written.
   462  		chmodTarEntry(header)
   463  
   464  		// check to see if this is a hard link
   465  		if linkCountForFileInfo(f) > 1 {
   466  			inode := inodeForFileInfo(f)
   467  			if dst, ok := t.hardLinks[inode]; ok {
   468  				// update the header if it is
   469  				header.Typeflag = tar.TypeLink
   470  				header.Linkname = dst
   471  				header.Size = 0
   472  			} else {
   473  				// push it on the list, and continue to write it as a file
   474  				// this is our first time seeing it
   475  				t.hardLinks[inode] = header.Name
   476  			}
   477  		}
   478  
   479  		// write the header
   480  		err = t.archive.WriteHeader(header)
   481  		if err != nil {
   482  			return err
   483  		}
   484  
   485  		// only write the file if tye type is still a regular file
   486  		if header.Typeflag == tar.TypeReg {
   487  			// open the file and copy
   488  			data, err := os.Open(filepath.Join(t.target, fullName))
   489  			if err != nil {
   490  				return err
   491  			}
   492  			_, err = io.Copy(t.archive, data)
   493  			if err != nil {
   494  				data.Close()
   495  				return err
   496  			}
   497  
   498  			// important to flush before the file is closed
   499  			err = t.archive.Flush()
   500  			if err != nil {
   501  				data.Close()
   502  				return err
   503  			}
   504  			// we want to ensure the file is closed in the loop
   505  			data.Close()
   506  		}
   507  
   508  	// device support
   509  	case mode&os.ModeDevice == os.ModeDevice ||
   510  		mode&os.ModeCharDevice == os.ModeCharDevice:
   511  		//
   512  		// stat to get devmode
   513  		fi, err := os.Stat(filepath.Join(t.target, fullName))
   514  		header.Devmajor, header.Devminor = osDeviceNumbersForFileInfo(fi)
   515  
   516  		// write the header
   517  		err = t.archive.WriteHeader(header)
   518  		if err != nil {
   519  			return err
   520  		}
   521  
   522  	// socket handling
   523  	case mode&os.ModeSocket == os.ModeSocket:
   524  		// skip... gnutar does, so we will
   525  	default:
   526  	}
   527  
   528  	return nil
   529  }
   530  
   531  func cleanLinkName(targetDir, name string) (string, error) {
   532  	dir := filepath.Dir(name)
   533  
   534  	// read the link
   535  	link, err := os.Readlink(filepath.Join(targetDir, name))
   536  	if err != nil {
   537  		return "", err
   538  	}
   539  
   540  	// if the target isn't absolute, make it absolute
   541  	// even if it is absolute, we want to convert it to be relative
   542  	if !filepath.IsAbs(link) {
   543  		link, err = filepath.Abs(filepath.Join(targetDir, dir, link))
   544  		if err != nil {
   545  			return "", err
   546  		}
   547  	}
   548  
   549  	// do a quick clean pass
   550  	link = filepath.Clean(link)
   551  
   552  	return link, nil
   553  }
   554  
   555  // shouldBeExcluded determines if supplied name is contained in the slice of
   556  // files to exclude. ignorePaths are considered in order so that files excluded
   557  // by one criteria can be reincluded by a later one.
   558  func (t *Tar) shouldBeExcluded(name string, isDir bool) bool {
   559  	name = filepath.ToSlash(filepath.Clean(name))
   560  	var exclude bool
   561  	for _, re := range t.ignorePaths {
   562  		if re.regexp.MatchString(name) || re.regexp.MatchString(filepath.Base(name)) {
   563  			if !re.dirOnly || (re.dirOnly && isDir) {
   564  				exclude = re.exclude
   565  			}
   566  		}
   567  	}
   568  
   569  	return exclude
   570  }
   571  
   572  // excludeRootPath determines if the path is the root path and should be
   573  // excluded.
   574  func (t *Tar) excludeRootPath(headerName string) bool {
   575  	if t.ExcludeRootPath && headerName == "./" {
   576  		return true
   577  	}
   578  
   579  	return false
   580  }