github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/util/targzip/targzip.go (about)

     1  package targzip
     2  
     3  import (
     4  	"archive/tar"
     5  	"compress/gzip"
     6  	"context"
     7  	"fmt"
     8  	"io"
     9  	"io/fs"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  
    14  	"github.com/c2h5oh/datasize"
    15  	"github.com/filecoin-project/bacalhau/pkg/system"
    16  	"github.com/filecoin-project/bacalhau/pkg/util/closer"
    17  )
    18  
    19  const (
    20  	MaximumContextSize            datasize.ByteSize = 10 * datasize.MB
    21  	worldReadOwnerWritePermission fs.FileMode       = 0755
    22  )
    23  
    24  func Compress(ctx context.Context, src string, buf io.Writer) error {
    25  	return compress(ctx, src, buf, MaximumContextSize)
    26  }
    27  
    28  func Decompress(src io.Reader, dst string) error {
    29  	return decompress(src, dst, MaximumContextSize)
    30  }
    31  
    32  func UncompressedSize(src io.Reader) (datasize.ByteSize, error) {
    33  	var size datasize.ByteSize
    34  	zr, err := gzip.NewReader(src)
    35  	if err != nil {
    36  		return 0, err
    37  	}
    38  	tr := tar.NewReader(zr)
    39  
    40  	var header *tar.Header
    41  	for header, err = tr.Next(); err == nil; header, err = tr.Next() {
    42  		size += datasize.ByteSize(header.Size)
    43  	}
    44  	if err == io.EOF {
    45  		err = nil
    46  	}
    47  	return size, err
    48  }
    49  
    50  // from https://github.com/mimoo/eureka/blob/master/folders.go under Apache 2
    51  //
    52  //nolint:gocyclo
    53  func compress(ctx context.Context, src string, buf io.Writer, max datasize.ByteSize) error {
    54  	_, span := system.NewSpan(ctx, system.GetTracer(), "pkg/util/targzip.compress")
    55  	defer span.End()
    56  
    57  	// tar > gzip > buf
    58  	zr := gzip.NewWriter(buf)
    59  	tw := tar.NewWriter(zr)
    60  
    61  	// is file a folder?
    62  	fi, err := os.Stat(src)
    63  	if err != nil {
    64  		return err
    65  	}
    66  	mode := fi.Mode()
    67  	if mode.IsRegular() {
    68  		if fi.Size() > int64(max) {
    69  			return fmt.Errorf("file %s bigger than max size %s", src, max.HumanReadable())
    70  		}
    71  		// get header
    72  		var header *tar.Header
    73  		header, err = tar.FileInfoHeader(fi, src)
    74  		if err != nil {
    75  			return err
    76  		}
    77  		// write header
    78  		if err = tw.WriteHeader(header); err != nil { //nolint:gocritic
    79  			return err
    80  		}
    81  		// get content
    82  		var data *os.File
    83  		data, err = os.Open(src)
    84  		if err != nil {
    85  			return err
    86  		}
    87  		defer closer.CloseWithLogOnError(fi.Name(), data)
    88  		if _, err = io.Copy(tw, data); err != nil {
    89  			return err
    90  		}
    91  	} else if mode.IsDir() { // folder
    92  		// walk through every file in the folder
    93  		err = filepath.Walk(src, func(file string, fi os.FileInfo, _ error) error {
    94  			// generate tar header
    95  			var header *tar.Header
    96  			header, err = tar.FileInfoHeader(fi, file)
    97  			if err != nil {
    98  				return err
    99  			}
   100  
   101  			// must provide real name
   102  			// (see https://golang.org/src/archive/tar/common.go?#L626)
   103  			header.Name = filepath.ToSlash(file)
   104  
   105  			// write header
   106  			if err = tw.WriteHeader(header); err != nil { //nolint:gocritic
   107  				return err
   108  			}
   109  			// if not a dir, write file content
   110  			if !fi.IsDir() {
   111  				var data *os.File
   112  				var fi os.FileInfo
   113  				fi, err = os.Stat(file)
   114  				if err != nil {
   115  					return err
   116  				}
   117  				if fi.Size() > int64(max) {
   118  					return fmt.Errorf("file %s bigger than max size %s", file, max.HumanReadable())
   119  				}
   120  				data, err = os.Open(file)
   121  				if err != nil {
   122  					return err
   123  				}
   124  				if _, err = io.Copy(tw, data); err != nil { //nolint:gocritic
   125  					return err
   126  				}
   127  				closer.CloseWithLogOnError(fi.Name(), data)
   128  			}
   129  			return nil
   130  		})
   131  		if err != nil {
   132  			return err
   133  		}
   134  	} else {
   135  		return fmt.Errorf("error: file type not supported")
   136  	}
   137  
   138  	// produce tar
   139  	if err := tw.Close(); err != nil {
   140  		return err
   141  	}
   142  	// produce gzip
   143  	if err := zr.Close(); err != nil {
   144  		return err
   145  	}
   146  	//
   147  	return nil
   148  }
   149  
   150  func decompress(src io.Reader, dst string, max datasize.ByteSize) error {
   151  	// ensure destination directory exists
   152  	err := os.Mkdir(dst, worldReadOwnerWritePermission)
   153  	if err != nil {
   154  		return err
   155  	}
   156  
   157  	// ungzip
   158  	zr, err := gzip.NewReader(src)
   159  	if err != nil {
   160  		return err
   161  	}
   162  	// untar
   163  	tr := tar.NewReader(zr)
   164  
   165  	// uncompress each element
   166  	for {
   167  		header, err := tr.Next()
   168  		if err == io.EOF {
   169  			break // End of archive
   170  		}
   171  		if err != nil {
   172  			return err
   173  		}
   174  		target := header.Name
   175  
   176  		// validate name against path traversal
   177  		if !validRelPath(header.Name) {
   178  			return fmt.Errorf("tar contained invalid name error %q", target)
   179  		}
   180  
   181  		// add dst + re-format slashes according to system
   182  		target, err = sanitizeArchivePath(dst, header.Name)
   183  		if err != nil {
   184  			return err
   185  		}
   186  		// if no join is needed, replace with ToSlash:
   187  		// target = filepath.ToSlash(header.Name)
   188  
   189  		// check the type
   190  		switch header.Typeflag {
   191  		// if its a dir and it doesn't exist create it (with 0755 permission)
   192  		case tar.TypeDir:
   193  			if _, err := os.Stat(target); err != nil {
   194  				if err := os.MkdirAll(target, worldReadOwnerWritePermission); err != nil {
   195  					return err
   196  				}
   197  			}
   198  		// if it's a file create it (with same permission)
   199  		case tar.TypeReg:
   200  			if header.Size > int64(max) {
   201  				return fmt.Errorf("file %s bigger than max size %s", header.Name, max.HumanReadable())
   202  			}
   203  			fileToWrite, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode))
   204  			if err != nil {
   205  				return err
   206  			}
   207  			// copy over contents (max 10MB per file!)
   208  			if _, err := io.CopyN(fileToWrite, tr, int64(max)); err != nil { //nolint:gomnd
   209  				// io.EOF is expected
   210  				if err != io.EOF {
   211  					return err
   212  				}
   213  			}
   214  			// manually close here after each file operation; defering would cause each file close
   215  			// to wait until all operations have completed.
   216  			fileToWrite.Close()
   217  		}
   218  	}
   219  
   220  	//
   221  	return nil
   222  }
   223  
   224  // check for path traversal and correct forward slashes
   225  func validRelPath(p string) bool {
   226  	if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") {
   227  		return false
   228  	}
   229  	return true
   230  }
   231  
   232  // Sanitize archive file pathing from "G305: Zip Slip vulnerability"
   233  func sanitizeArchivePath(d, t string) (v string, err error) {
   234  	v = filepath.Join(d, t)
   235  	if strings.HasPrefix(v, filepath.Clean(d)) {
   236  		return v, nil
   237  	}
   238  
   239  	return "", fmt.Errorf("%s: %s", "content filepath is tainted", t)
   240  }