code-intelligence.com/cifuzz@v0.40.0/internal/bundler/archive/archive.go (about)

     1  package archive
     2  
     3  import (
     4  	"archive/tar"
     5  	"compress/gzip"
     6  	"io"
     7  	"io/fs"
     8  	"os"
     9  	"path/filepath"
    10  
    11  	"github.com/pkg/errors"
    12  
    13  	"code-intelligence.com/cifuzz/pkg/log"
    14  	"code-intelligence.com/cifuzz/util/archiveutil"
    15  	"code-intelligence.com/cifuzz/util/fileutil"
    16  )
    17  
    18  type ArchiveWriter interface {
    19  	Close() error
    20  	WriteFile(string, string) error
    21  	WriteDir(string, string) error
    22  	WriteHardLink(string, string) error
    23  	GetSourcePath(string) string
    24  	HasFileEntry(string) bool
    25  }
    26  
    27  type NullArchiveWriter struct{}
    28  
    29  func (w *NullArchiveWriter) Close() error {
    30  	return nil
    31  }
    32  func (w *NullArchiveWriter) WriteFile(string, string) error {
    33  	return nil
    34  }
    35  func (w *NullArchiveWriter) WriteDir(string, string) error {
    36  	return nil
    37  }
    38  func (w *NullArchiveWriter) WriteHardLink(string, string) error {
    39  	return nil
    40  }
    41  func (w *NullArchiveWriter) GetSourcePath(string) string {
    42  	return ""
    43  }
    44  func (w *NullArchiveWriter) HasFileEntry(string) bool {
    45  	return true
    46  }
    47  
    48  // TarArchiveWriter provides functions to create a gzip-compressed tar archive.
    49  type TarArchiveWriter struct {
    50  	*tar.Writer
    51  	manifest   map[string]string
    52  	gzipWriter *gzip.Writer
    53  }
    54  
    55  func NewTarArchiveWriter(w io.Writer, compress bool) *TarArchiveWriter {
    56  	var gzipWriter *gzip.Writer
    57  	var writer *tar.Writer
    58  
    59  	if compress {
    60  		gzipWriter = gzip.NewWriter(w)
    61  		writer = tar.NewWriter(gzipWriter)
    62  	} else {
    63  		writer = tar.NewWriter(w)
    64  	}
    65  
    66  	return &TarArchiveWriter{
    67  		Writer:     writer,
    68  		manifest:   make(map[string]string),
    69  		gzipWriter: gzipWriter,
    70  	}
    71  }
    72  
    73  // Close closes the tar writer and the gzip writer. It does not close
    74  // the underlying io.Writer.
    75  func (w *TarArchiveWriter) Close() error {
    76  	var err error
    77  	err = w.Writer.Close()
    78  	if err != nil {
    79  		return errors.WithStack(err)
    80  	}
    81  
    82  	if w.gzipWriter != nil {
    83  		err = w.gzipWriter.Close()
    84  	}
    85  
    86  	if err != nil {
    87  		return errors.WithStack(err)
    88  	}
    89  	return nil
    90  }
    91  
    92  // WriteFile writes the contents of sourcePath to the archive, with the
    93  // filename archivePath (so when the archive is extracted, the file will
    94  // be created at archivePath). Symlinks will be followed.
    95  // WriteFile only handles regular files and symlinks.
    96  func (w *TarArchiveWriter) WriteFile(archivePath string, sourcePath string) error {
    97  	if fileutil.IsDir(sourcePath) {
    98  		return errors.Errorf("file is a directory: %s", sourcePath)
    99  	}
   100  	return w.writeFileOrEmptyDir(archivePath, sourcePath)
   101  }
   102  
   103  // writeFileOrEmptyDir does the same as WriteFile but doesn't return an
   104  // error when passed a directory. If passed a directory, it creates an
   105  // empty directory at archivePath.
   106  func (w *TarArchiveWriter) writeFileOrEmptyDir(archivePath string, sourcePath string) error {
   107  	// To match the tar specification, which requires forward slashes as path separators,
   108  	// we convert potential windows path separators to forward slashes.
   109  	// Otherwise tars created on Windows will not work correctly on other platforms.
   110  	archivePath = filepath.ToSlash(archivePath)
   111  	existingAbsPath, conflict := w.manifest[archivePath]
   112  	if conflict {
   113  		if existingAbsPath == sourcePath {
   114  			log.Debugf("Skipping file %q, was already added to the archive", sourcePath)
   115  			return nil
   116  		} else {
   117  			return errors.Errorf("archive path %q has two source files: %q and %q", archivePath, existingAbsPath, sourcePath)
   118  		}
   119  	}
   120  
   121  	f, err := os.Open(sourcePath)
   122  	if err != nil {
   123  		return errors.WithStack(err)
   124  	}
   125  	defer f.Close()
   126  
   127  	info, err := f.Stat()
   128  	if err != nil {
   129  		return errors.WithStack(err)
   130  	}
   131  
   132  	// Since os.File.Stat() follows symlinks, info will not be of type symlink
   133  	// at this point - no need to pass in a non-empty value for link.
   134  	header, err := tar.FileInfoHeader(info, "")
   135  	if err != nil {
   136  		return errors.WithStack(err)
   137  	}
   138  	header.Name = archivePath
   139  	err = w.WriteHeader(header)
   140  	if err != nil {
   141  		return errors.WithStack(err)
   142  	}
   143  
   144  	if info.IsDir() {
   145  		return nil
   146  	}
   147  	if !info.Mode().IsRegular() {
   148  		return errors.Errorf("not a regular file: %s", sourcePath)
   149  	}
   150  
   151  	_, err = io.Copy(w.Writer, f)
   152  	if err != nil {
   153  		return errors.Wrapf(err, "failed to add file to archive: %s", sourcePath)
   154  	}
   155  
   156  	w.manifest[archivePath] = sourcePath
   157  	return nil
   158  }
   159  
   160  // WriteHardLink adds a hard link header to the archive. When the
   161  // archive is extracted, a hard link to target with the name linkname is
   162  // created.
   163  func (w *TarArchiveWriter) WriteHardLink(target string, linkname string) error {
   164  	existingAbsPath, conflict := w.manifest[linkname]
   165  	if conflict {
   166  		return errors.Errorf("conflict for archive path %q: %q and %q", target, existingAbsPath, linkname)
   167  	}
   168  
   169  	header := &tar.Header{
   170  		Typeflag: tar.TypeLink,
   171  		Name:     linkname,
   172  		Linkname: target,
   173  	}
   174  	err := w.WriteHeader(header)
   175  	if err != nil {
   176  		return errors.WithStack(err)
   177  	}
   178  	w.manifest[target] = linkname
   179  	return nil
   180  }
   181  
   182  // WriteDir traverses sourceDir recursively and writes all regular files
   183  // and symlinks to the archive.
   184  func (w *TarArchiveWriter) WriteDir(archiveBasePath string, sourceDir string) error {
   185  	return filepath.WalkDir(sourceDir, func(path string, d fs.DirEntry, err error) error {
   186  		if err != nil {
   187  			return err
   188  		}
   189  
   190  		relPath, err := filepath.Rel(sourceDir, path)
   191  		if err != nil {
   192  			return errors.WithStack(err)
   193  		}
   194  		archivePath := filepath.Join(archiveBasePath, relPath)
   195  
   196  		// skip self referencing directories
   197  		if relPath == "." && archivePath == "." {
   198  			return nil
   199  		}
   200  
   201  		// There is no harm in creating tar entries for empty directories, even though they are not necessary.
   202  		return w.writeFileOrEmptyDir(archivePath, path)
   203  	})
   204  }
   205  
   206  func (w *TarArchiveWriter) GetSourcePath(archivePath string) string {
   207  	return w.manifest[archivePath]
   208  }
   209  
   210  func (w *TarArchiveWriter) HasFileEntry(archivePath string) bool {
   211  	_, exists := w.manifest[archivePath]
   212  	return exists
   213  }
   214  
   215  // Extract extracts the gzip-compressed tar archive bundle into dir.
   216  func Extract(bundle, dir string) error {
   217  	f, err := os.Open(bundle)
   218  	if err != nil {
   219  		return errors.WithStack(err)
   220  	}
   221  	gr, err := gzip.NewReader(f)
   222  	if err != nil {
   223  		return errors.WithStack(err)
   224  	}
   225  	defer gr.Close()
   226  	return archiveutil.Untar(gr, dir)
   227  }