github.com/sealerio/sealer@v0.11.1-0.20240507115618-f4f89c5853ae/utils/archive/compress.go (about)

     1  // Copyright © 2021 Alibaba Group Holding Ltd.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package archive
    16  
    17  import (
    18  	"archive/tar"
    19  	"bufio"
    20  	"compress/gzip"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"path/filepath"
    26  	"strings"
    27  	"syscall"
    28  
    29  	"golang.org/x/sys/unix"
    30  
    31  	"github.com/sirupsen/logrus"
    32  
    33  	"github.com/sealerio/sealer/common"
    34  )
    35  
    36  const compressionBufSize = 32768
    37  
    38  type Options struct {
    39  	Compress    bool
    40  	KeepRootDir bool
    41  	ToStream    bool
    42  }
    43  
    44  func validatePath(path string) error {
    45  	if _, err := os.Stat(path); err != nil {
    46  		return fmt.Errorf("dir %s does not exist, err: %s", path, err)
    47  	}
    48  	return nil
    49  }
    50  
    51  // TarWithRootDir
    52  // src is the dir or single file to tar
    53  // not contain the dir
    54  // newFolder is a folder for tar file
    55  func TarWithRootDir(paths ...string) (readCloser io.ReadCloser, err error) {
    56  	return compress(paths, Options{Compress: false, KeepRootDir: true})
    57  }
    58  
    59  // TarWithoutRootDir function will tar files, but without keeping the original dir
    60  // this is useful when we tar files at the build stage
    61  func TarWithoutRootDir(paths ...string) (readCloser io.ReadCloser, err error) {
    62  	return compress(paths, Options{Compress: false, KeepRootDir: false})
    63  }
    64  
    65  func Untar(src io.Reader, dst string) (int64, error) {
    66  	return Decompress(src, dst, Options{Compress: false})
    67  }
    68  
    69  // GzipCompress make the tar stream to be gzip stream.
    70  func GzipCompress(in io.Reader) (io.ReadCloser, chan struct{}) {
    71  	compressionDone := make(chan struct{})
    72  
    73  	pipeReader, pipeWriter := io.Pipe()
    74  	// Use a bufio.Writer to avoid excessive chunking in HTTP request.
    75  	bufWriter := bufio.NewWriterSize(pipeWriter, compressionBufSize)
    76  	compressor := gzip.NewWriter(bufWriter)
    77  
    78  	go func() {
    79  		_, err := io.Copy(compressor, in)
    80  		if err == nil {
    81  			err = compressor.Close()
    82  		}
    83  		if err == nil {
    84  			err = bufWriter.Flush()
    85  		}
    86  		if err != nil {
    87  			// leave this err
    88  			_ = pipeWriter.CloseWithError(err)
    89  		} else {
    90  			err := pipeWriter.Close()
    91  			if err != nil {
    92  				return
    93  			}
    94  		}
    95  		close(compressionDone)
    96  	}()
    97  
    98  	return pipeReader, compressionDone
    99  }
   100  
   101  func compress(paths []string, options Options) (reader io.ReadCloser, err error) {
   102  	if len(paths) == 0 {
   103  		return nil, errors.New("[archive] source must be provided")
   104  	}
   105  	for _, path := range paths {
   106  		err = validatePath(path)
   107  		if err != nil {
   108  			return nil, err
   109  		}
   110  	}
   111  
   112  	pr, pw := io.Pipe()
   113  	tw := tar.NewWriter(pw)
   114  	bufWriter := bufio.NewWriterSize(nil, compressionBufSize)
   115  	if options.Compress {
   116  		tw = tar.NewWriter(gzip.NewWriter(pw))
   117  	}
   118  	go func() {
   119  		defer func() {
   120  			err := tw.Close()
   121  			if err != nil {
   122  				return
   123  			}
   124  			err = pw.Close()
   125  			if err != nil {
   126  				return
   127  			}
   128  		}()
   129  
   130  		for _, path := range paths {
   131  			err = writeToTarWriter(path, tw, bufWriter, options)
   132  			if err != nil {
   133  				_ = pw.CloseWithError(err)
   134  			}
   135  		}
   136  	}()
   137  
   138  	return pr, nil
   139  }
   140  
   141  func writeWhiteout(header *tar.Header, fi os.FileInfo, path string) *tar.Header {
   142  	// overlay whiteout process
   143  	// this is a whiteout file
   144  	if fi.Mode()&os.ModeCharDevice != 0 && header.Devminor == 0 && header.Devmajor == 0 {
   145  		hName := header.Name
   146  		header.Name = filepath.Join(filepath.Dir(hName), WhiteoutPrefix+filepath.Base(hName))
   147  		header.Mode = 0600
   148  		header.Typeflag = tar.TypeReg
   149  		header.Size = 0
   150  	}
   151  
   152  	var woh *tar.Header
   153  	if fi.Mode()&os.ModeDir != 0 {
   154  		opaque, walkErr := Lgetxattr(path, "trusted.overlay.opaque")
   155  		if walkErr != nil {
   156  			logrus.Debugf("failed to get trusted.overlay.opaque for %s at opaque, err: %v", path, walkErr)
   157  		}
   158  
   159  		if len(opaque) == 1 && opaque[0] == 'y' {
   160  			if header.PAXRecords != nil {
   161  				delete(header.PAXRecords, "trusted.overlay.opaque")
   162  			}
   163  
   164  			woh = &tar.Header{
   165  				Typeflag: tar.TypeReg,
   166  				Mode:     header.Mode & int64(os.ModePerm),
   167  				// #nosec
   168  				Name:       filepath.Join(header.Name, WhiteoutOpaqueDir),
   169  				Size:       0,
   170  				Uid:        header.Uid,
   171  				Uname:      header.Uname,
   172  				Gid:        header.Gid,
   173  				Gname:      header.Gname,
   174  				AccessTime: header.AccessTime,
   175  				ChangeTime: header.ChangeTime,
   176  			}
   177  		}
   178  	}
   179  	return woh
   180  }
   181  
   182  func readWhiteout(hdr *tar.Header, path string) (bool, error) {
   183  	var (
   184  		base = filepath.Base(path)
   185  		dir  = filepath.Dir(path)
   186  		err  error
   187  	)
   188  
   189  	switch {
   190  	case base == WhiteoutOpaqueDir:
   191  		err = unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0)
   192  		return false, err
   193  	case strings.HasPrefix(base, WhiteoutPrefix):
   194  		oBase := base[len(WhiteoutPrefix):]
   195  		oPath := filepath.Join(dir, oBase)
   196  
   197  		// make a whiteout file
   198  		err = unix.Mknod(oPath, unix.S_IFCHR, 0)
   199  		if err != nil {
   200  			return false, err
   201  		}
   202  		return false, os.Chown(oPath, hdr.Uid, hdr.Gid)
   203  	}
   204  
   205  	return true, nil
   206  }
   207  
   208  func writeToTarWriter(path string, tarWriter *tar.Writer, bufWriter *bufio.Writer, options Options) error {
   209  	var newFolder string
   210  	if options.KeepRootDir {
   211  		fi, err := os.Stat(path)
   212  		if err != nil {
   213  			return err
   214  		}
   215  		if fi.IsDir() {
   216  			newFolder = filepath.Base(path)
   217  		}
   218  	}
   219  
   220  	dir := strings.TrimSuffix(path, "/")
   221  	srcPrefix := filepath.ToSlash(dir + "/")
   222  	err := filepath.Walk(dir, func(file string, fi os.FileInfo, err error) error {
   223  		// generate tar header
   224  		header, walkErr := tar.FileInfoHeader(fi, file)
   225  		if walkErr != nil {
   226  			return walkErr
   227  		}
   228  		// root dir
   229  		if file != dir {
   230  			absPath := filepath.ToSlash(file)
   231  			header.Name = filepath.Join(newFolder, strings.TrimPrefix(absPath, srcPrefix))
   232  		} else {
   233  			// do not contain root dir
   234  			if fi.IsDir() {
   235  				return nil
   236  			}
   237  			// for supporting tar single file
   238  			header.Name = filepath.Join(newFolder, filepath.Base(dir))
   239  		}
   240  		// if current file is whiteout, the header has been changed,
   241  		// and we write a reg header into tar stream, but will not read its content
   242  		// cause doing so will lead to error. (its size is 0)
   243  
   244  		// if current target is dir, we will check if it is an opaque.
   245  		// and set add Suffix WhiteoutOpaqueDir for opaque.
   246  		// but we still need to write its original header into tar stream,
   247  		// because we need to create dir on this original header.
   248  		woh := writeWhiteout(header, fi, file)
   249  		walkErr = tarWriter.WriteHeader(header)
   250  		if walkErr != nil {
   251  			return fmt.Errorf("failed to write original header, path: %s, err: %v", file, walkErr)
   252  		}
   253  		// this is an opaque, write the opaque header, in order to set header.PAXRecords with trusted.overlay.opaque:y
   254  		// when decompress the tar stream.
   255  		if woh != nil {
   256  			walkErr = tarWriter.WriteHeader(woh)
   257  			if walkErr != nil {
   258  				return fmt.Errorf("failed to write opaque header, path: %s, err: %v", file, walkErr)
   259  			}
   260  		}
   261  		// if not a dir && size > 0, write file content
   262  		// the whiteout size is 0
   263  		if header.Typeflag == tar.TypeReg && header.Size > 0 {
   264  			var fHandler *os.File
   265  			fHandler, walkErr = os.Open(filepath.Clean(file))
   266  			if walkErr != nil {
   267  				return walkErr
   268  			}
   269  			defer func() {
   270  				if err := fHandler.Close(); err != nil {
   271  					logrus.Errorf("failed to close file: %v", err)
   272  				}
   273  			}()
   274  			bufWriter.Reset(tarWriter)
   275  			defer bufWriter.Reset(nil)
   276  
   277  			_, walkErr = io.Copy(bufWriter, fHandler)
   278  			if walkErr != nil {
   279  				return walkErr
   280  			}
   281  
   282  			walkErr = bufWriter.Flush()
   283  			if walkErr != nil {
   284  				return walkErr
   285  			}
   286  		}
   287  		return nil
   288  	})
   289  
   290  	return err
   291  }
   292  
   293  func removePreviousFiles(path string) error {
   294  	dir := filepath.Dir(path)
   295  	existPath := path
   296  	if base := filepath.Base(path); strings.HasPrefix(base, WhiteoutPrefix) {
   297  		existPath = filepath.Join(dir, strings.TrimPrefix(base, WhiteoutPrefix))
   298  	}
   299  
   300  	if _, err := os.Stat(existPath); err == nil {
   301  		if err := os.RemoveAll(existPath); err != nil {
   302  			return err
   303  		}
   304  	}
   305  	return nil
   306  }
   307  
   308  // Decompress this will not change the metadata of original files
   309  func Decompress(src io.Reader, dst string, options Options) (int64, error) {
   310  	// need to set umask to be 000 for current process.
   311  	// there will be some files having higher permission like 777,
   312  	// eventually permission will be set to 755 when umask is 022.
   313  	oldMask := syscall.Umask(0)
   314  	defer syscall.Umask(oldMask)
   315  
   316  	err := os.MkdirAll(dst, common.FileMode0755)
   317  	if err != nil {
   318  		return 0, err
   319  	}
   320  
   321  	reader := src
   322  	if options.Compress {
   323  		reader, err = gzip.NewReader(src)
   324  		if err != nil {
   325  			return 0, err
   326  		}
   327  	}
   328  
   329  	var (
   330  		size int64
   331  		dirs []*tar.Header
   332  		tr   = tar.NewReader(reader)
   333  	)
   334  	for {
   335  		header, err := tr.Next()
   336  		if err == io.EOF {
   337  			break
   338  		}
   339  		if err != nil {
   340  			return 0, err
   341  		}
   342  		size += header.Size
   343  		// validate name against path traversal
   344  		if !validRelPath(header.Name) {
   345  			return 0, fmt.Errorf("tar contained invalid name error %q", header.Name)
   346  		}
   347  
   348  		// #nosec
   349  		target := filepath.Join(dst, header.Name)
   350  		err = removePreviousFiles(target)
   351  		if err != nil {
   352  			return 0, err
   353  		}
   354  
   355  		goon, err := readWhiteout(header, target)
   356  		if err != nil {
   357  			return 0, err
   358  		}
   359  		// it is an opaque / whiteout, don't write its file content.
   360  		if !goon {
   361  			continue
   362  		}
   363  
   364  		switch header.Typeflag {
   365  		case tar.TypeDir:
   366  			if _, err = os.Stat(target); err != nil {
   367  				if err = os.MkdirAll(target, os.FileMode(header.Mode)); err != nil {
   368  					return 0, err
   369  				}
   370  				dirs = append(dirs, header)
   371  			}
   372  
   373  		case tar.TypeReg:
   374  			err = func() error {
   375  				// regularly won't mkdir, unless add newFolder on compressing
   376  				inErr := os.MkdirAll(filepath.Dir(target), 0700|0055)
   377  				if inErr != nil {
   378  					return inErr
   379  				}
   380  				// #nosec
   381  				fileToWrite, inErr := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.FileMode(header.Mode))
   382  				if inErr != nil {
   383  					return inErr
   384  				}
   385  
   386  				defer func() {
   387  					if err := fileToWrite.Close(); err != nil {
   388  						logrus.Errorf("failed to close file: %v", err)
   389  					}
   390  				}()
   391  				if _, inErr = io.Copy(fileToWrite, tr); inErr != nil {
   392  					return inErr
   393  				}
   394  				// for not changing
   395  				return os.Chtimes(target, header.AccessTime, header.ModTime)
   396  			}()
   397  
   398  			if err != nil {
   399  				return 0, err
   400  			}
   401  		}
   402  	}
   403  
   404  	for _, h := range dirs {
   405  		// #nosec
   406  		path := filepath.Join(dst, h.Name)
   407  		err = os.Chtimes(path, h.AccessTime, h.ModTime)
   408  		if err != nil {
   409  			return 0, err
   410  		}
   411  	}
   412  
   413  	return size, nil
   414  }
   415  
   416  // check for path traversal and correct forward slashes
   417  func validRelPath(p string) bool {
   418  	if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") {
   419  		return false
   420  	}
   421  	return true
   422  }