github.com/bitrise-io/go-steputils/v2@v2.0.0-alpha.30/cache/compression/compression.go (about)

     1  package compression
     2  
     3  import (
     4  	"archive/tar"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"io/fs"
    10  	"os"
    11  	"path/filepath"
    12  
    13  	"github.com/bitrise-io/go-utils/v2/command"
    14  	"github.com/bitrise-io/go-utils/v2/env"
    15  	"github.com/bitrise-io/go-utils/v2/log"
    16  	"github.com/klauspost/compress/zstd"
    17  )
    18  
    19  // ArchiveDependencyChecker ...
    20  //
    21  //go:generate moq -stub -out archive_dependency_checker_mock.go . ArchiveDependencyChecker
    22  type ArchiveDependencyChecker interface {
    23  	CheckDependencies() bool
    24  }
    25  
    26  // DependencyChecker ...
    27  type DependencyChecker struct {
    28  	logger  log.Logger
    29  	envRepo env.Repository
    30  }
    31  
    32  // NewDependencyChecker ...
    33  func NewDependencyChecker(logger log.Logger, envRepo env.Repository) *DependencyChecker {
    34  	return &DependencyChecker{
    35  		logger:  logger,
    36  		envRepo: envRepo,
    37  	}
    38  }
    39  
    40  // CheckDependencies ...
    41  func (dc *DependencyChecker) CheckDependencies() bool {
    42  	return dc.checkDepdendency("tar") && dc.checkDepdendency("zstd")
    43  }
    44  
    45  func (dc *DependencyChecker) checkDepdendency(binaryName string) bool {
    46  	cmdFactory := command.NewFactory(dc.envRepo)
    47  	cmd := cmdFactory.Create("which", []string{binaryName}, nil)
    48  	dc.logger.Debugf("$ %s", cmd.PrintableCommandArgs())
    49  
    50  	_, err := cmd.RunAndReturnTrimmedCombinedOutput()
    51  	return err == nil
    52  }
    53  
    54  // Archiver ...
    55  type Archiver struct {
    56  	logger                   log.Logger
    57  	envRepo                  env.Repository
    58  	archiveDependencyChecker ArchiveDependencyChecker
    59  }
    60  
    61  // NewArchiver ...
    62  func NewArchiver(logger log.Logger, envRepo env.Repository, archiveDependencyChecker ArchiveDependencyChecker) *Archiver {
    63  	return &Archiver{
    64  		logger:                   logger,
    65  		envRepo:                  envRepo,
    66  		archiveDependencyChecker: archiveDependencyChecker,
    67  	}
    68  }
    69  
    70  // Compress creates a compressed archive from the provided files and folders using absolute paths.
    71  func (a *Archiver) Compress(archivePath string, includePaths []string) error {
    72  	haveZstdAndTar := a.archiveDependencyChecker.CheckDependencies()
    73  
    74  	if !haveZstdAndTar {
    75  		a.logger.Infof("Falling back to native implementation of zstd.")
    76  		if err := a.compressWithGoLib(archivePath, includePaths); err != nil {
    77  			return fmt.Errorf("compress files: %w", err)
    78  		}
    79  		return nil
    80  	}
    81  
    82  	a.logger.Infof("Using installed zstd binary")
    83  	if err := a.compressWithBinary(archivePath, includePaths); err != nil {
    84  		return fmt.Errorf("compress files: %w", err)
    85  	}
    86  	return nil
    87  }
    88  
    89  // Decompress takes an archive path and extracts files. This assumes an archive created with absolute file paths.
    90  func (a *Archiver) Decompress(archivePath string, destinationDirectory string) error {
    91  	haveZstdAndTar := a.archiveDependencyChecker.CheckDependencies()
    92  	if !haveZstdAndTar {
    93  		a.logger.Infof("Falling back to native implementation of zstd.")
    94  		if err := a.decompressWithGolib(archivePath, destinationDirectory); err != nil {
    95  			return fmt.Errorf("decompress files: %w", err)
    96  		}
    97  		return nil
    98  	}
    99  
   100  	a.logger.Infof("Using installed zstd binary")
   101  	if err := a.decompressWithBinary(archivePath, destinationDirectory); err != nil {
   102  		return fmt.Errorf("decompress files: %w", err)
   103  	}
   104  	return nil
   105  }
   106  
   107  func (a *Archiver) compressWithGoLib(archivePath string, includePaths []string) error {
   108  	var buf bytes.Buffer
   109  
   110  	for _, p := range includePaths {
   111  		zstdWriter, err := zstd.NewWriter(&buf)
   112  		if err != nil {
   113  			return fmt.Errorf("create zstd writer: %w", err)
   114  		}
   115  		tw := tar.NewWriter(zstdWriter)
   116  
   117  		path := filepath.Clean(p)
   118  		// walk through every file in the folder
   119  		if err := filepath.Walk(path, func(file string, fi os.FileInfo, e error) error {
   120  			// generate tar header
   121  			header, err := tar.FileInfoHeader(fi, file)
   122  			if err != nil {
   123  				return fmt.Errorf("create file info header: %w", err)
   124  			}
   125  
   126  			path := filepath.Clean(file)
   127  			header.Name = path
   128  
   129  			var link string
   130  			if fi.Mode()&os.ModeSymlink != 0 {
   131  				if link, err = os.Readlink(file); err != nil {
   132  					return fmt.Errorf("read symlink: %w", err)
   133  				}
   134  			}
   135  			if link != "" {
   136  				header.Typeflag = tar.TypeSymlink
   137  				header.Linkname = link
   138  			}
   139  
   140  			// write header
   141  			if err := tw.WriteHeader(header); err != nil {
   142  				return fmt.Errorf("write tar file header: %w", err)
   143  			}
   144  
   145  			// nothing more to do for non-regular files or directories
   146  			if !fi.Mode().IsRegular() || fi.IsDir() {
   147  				return nil
   148  			}
   149  
   150  			data, err := os.Open(file)
   151  			if err != nil {
   152  				return fmt.Errorf("open file: %w", err)
   153  			}
   154  			if _, err := io.Copy(tw, data); err != nil {
   155  				return fmt.Errorf("copy to file: %w", err)
   156  			}
   157  			if err := data.Close(); err != nil {
   158  				return fmt.Errorf("close file: %w", err)
   159  			}
   160  
   161  			return nil
   162  		}); err != nil {
   163  			return fmt.Errorf("iterate on files: %w", err)
   164  		}
   165  
   166  		// produce tar
   167  		if err := tw.Close(); err != nil {
   168  			return fmt.Errorf("close tar writer: %w", err)
   169  		}
   170  		// produce zstd
   171  		if err := zstdWriter.Close(); err != nil {
   172  			return fmt.Errorf("close zstd writer: %w", err)
   173  		}
   174  	}
   175  
   176  	// write the archive file
   177  	fileToWrite, err := os.OpenFile(archivePath, os.O_CREATE|os.O_RDWR, 0777)
   178  	if err != nil {
   179  		return fmt.Errorf("create archive file: %w", err)
   180  	}
   181  	if _, err := io.Copy(fileToWrite, &buf); err != nil {
   182  		return fmt.Errorf("write arhive file: %w", err)
   183  	}
   184  	if err := fileToWrite.Close(); err != nil {
   185  		return fmt.Errorf("close archive file: %w", err)
   186  	}
   187  
   188  	return nil
   189  }
   190  
   191  func (a *Archiver) compressWithBinary(archivePath string, includePaths []string) error {
   192  	cmdFactory := command.NewFactory(a.envRepo)
   193  
   194  	/*
   195  		tar arguments:
   196  		--use-compress-program: Pipe the output to zstd instead of using the built-in gzip compression
   197  		-P: Alias for --absolute-paths in BSD tar and --absolute-names in GNU tar (step runs on both Linux and macOS)
   198  			Storing absolute paths in the archive allows paths outside the current directory (such as ~/.gradle)
   199  		-c: Create archive
   200  		-f: Output file
   201  	*/
   202  	tarArgs := []string{
   203  		"--use-compress-program", "zstd --threads=0", // Use CPU count threads
   204  		"-P",
   205  		"-c",
   206  		"-f", archivePath,
   207  	}
   208  	tarArgs = append(tarArgs, includePaths...)
   209  
   210  	cmd := cmdFactory.Create("tar", tarArgs, nil)
   211  
   212  	a.logger.Debugf("$ %s", cmd.PrintableCommandArgs())
   213  
   214  	out, err := cmd.RunAndReturnTrimmedCombinedOutput()
   215  	if err != nil {
   216  		a.logger.Printf("Output: %s", out)
   217  		return err
   218  	}
   219  
   220  	return nil
   221  }
   222  
   223  func (a *Archiver) decompressWithGolib(archivePath string, destinationDirectory string) error {
   224  	compressedFile, err := os.OpenFile(archivePath, os.O_RDWR, 0777)
   225  	if err != nil {
   226  		return fmt.Errorf("read file %s: %w", archivePath, err)
   227  	}
   228  
   229  	zr, err := zstd.NewReader(compressedFile)
   230  	if err != nil {
   231  		return fmt.Errorf("create zstd reader: %w", err)
   232  	}
   233  
   234  	tr := tar.NewReader(zr)
   235  	for {
   236  		header, err := tr.Next()
   237  		if err == io.EOF {
   238  			break
   239  		}
   240  		if err != nil {
   241  			return fmt.Errorf("read tar file: %w", err)
   242  		}
   243  
   244  		target := filepath.ToSlash(header.Name)
   245  
   246  		if destinationDirectory != "" {
   247  			target = filepath.Join(destinationDirectory, target)
   248  		}
   249  
   250  		switch header.Typeflag {
   251  		// if its a dir and it doesn't exist create it (with 0755 permission)
   252  		case tar.TypeDir:
   253  			if _, err := os.Stat(target); err != nil {
   254  				if err := os.MkdirAll(target, 0755); err != nil {
   255  					return fmt.Errorf("create target directories: %w", err)
   256  				}
   257  			}
   258  		// if it's a file create it (with same permission)
   259  		case tar.TypeReg:
   260  			fileToWrite, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode))
   261  			if err != nil {
   262  				return fmt.Errorf("create file: %w", err)
   263  			}
   264  			// copy over contents
   265  			if _, err := io.Copy(fileToWrite, tr); err != nil {
   266  				return fmt.Errorf("copy content to file: %w", err)
   267  			}
   268  			// manually close here after each file operation; defering would cause each file close
   269  			// to wait until all operations have completed.
   270  			if err := fileToWrite.Close(); err != nil {
   271  				return fmt.Errorf("write file: %w", err)
   272  			}
   273  		case tar.TypeSymlink:
   274  			err = os.Symlink(header.Linkname, target)
   275  			if err != nil {
   276  				return fmt.Errorf("symlink file: %w", err)
   277  			}
   278  
   279  		}
   280  	}
   281  	return nil
   282  }
   283  
   284  func (a *Archiver) decompressWithBinary(archivePath string, destinationDirectory string) error {
   285  	commandFactory := command.NewFactory(a.envRepo)
   286  
   287  	/*
   288  		tar arguments:
   289  		--use-compress-program: Pipe the input to zstd instead of using the built-in gzip compression
   290  		-P: Alias for --absolute-paths in BSD tar and --absolute-names in GNU tar (step runs on both Linux and macOS)
   291  			Storing absolute paths in the archive allows paths outside the current directory (such as ~/.gradle)
   292  		-x: Extract archive
   293  		-f: Output file
   294  	*/
   295  	decompressTarArgs := []string{
   296  		"--use-compress-program", "zstd -d",
   297  		"-x",
   298  		"-f", archivePath,
   299  		"-P",
   300  	}
   301  
   302  	if destinationDirectory != "" {
   303  		decompressTarArgs = append(decompressTarArgs, "--directory", destinationDirectory)
   304  	}
   305  
   306  	cmd := commandFactory.Create("tar", decompressTarArgs, nil)
   307  	a.logger.Debugf("$ %s", cmd.PrintableCommandArgs())
   308  
   309  	out, err := cmd.RunAndReturnTrimmedCombinedOutput()
   310  	if err != nil {
   311  		a.logger.Printf("Output: %s", out)
   312  		return err
   313  	}
   314  
   315  	return nil
   316  }
   317  
   318  // AreAllPathsEmpty checks if the provided paths are all nonexistent files or empty directories
   319  func AreAllPathsEmpty(includePaths []string) bool {
   320  	allEmpty := true
   321  
   322  	for _, path := range includePaths {
   323  		// Check if file exists at path
   324  		fileInfo, err := os.Stat(path)
   325  		if errors.Is(err, fs.ErrNotExist) {
   326  			// File doesn't exist
   327  			continue
   328  		}
   329  
   330  		// Check if it's a directory
   331  		if !fileInfo.IsDir() {
   332  			// Is a file and it exists
   333  			allEmpty = false
   334  			break
   335  		}
   336  
   337  		file, err := os.Open(path)
   338  		if err != nil {
   339  			continue
   340  		}
   341  		_, err = file.Readdirnames(1) // query only 1 child
   342  		if errors.Is(err, io.EOF) {
   343  			// Dir is empty
   344  			continue
   345  		}
   346  		if err == nil {
   347  			// Dir has files or dirs
   348  			allEmpty = false
   349  			break
   350  		}
   351  	}
   352  
   353  	return allEmpty
   354  }