github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/util/targzip/targzip.go (about) 1 package targzip 2 3 import ( 4 "archive/tar" 5 "compress/gzip" 6 "context" 7 "fmt" 8 "io" 9 "io/fs" 10 "os" 11 "path/filepath" 12 "strings" 13 14 "github.com/c2h5oh/datasize" 15 "github.com/filecoin-project/bacalhau/pkg/system" 16 "github.com/filecoin-project/bacalhau/pkg/util/closer" 17 ) 18 19 const ( 20 MaximumContextSize datasize.ByteSize = 10 * datasize.MB 21 worldReadOwnerWritePermission fs.FileMode = 0755 22 ) 23 24 func Compress(ctx context.Context, src string, buf io.Writer) error { 25 return compress(ctx, src, buf, MaximumContextSize) 26 } 27 28 func Decompress(src io.Reader, dst string) error { 29 return decompress(src, dst, MaximumContextSize) 30 } 31 32 func UncompressedSize(src io.Reader) (datasize.ByteSize, error) { 33 var size datasize.ByteSize 34 zr, err := gzip.NewReader(src) 35 if err != nil { 36 return 0, err 37 } 38 tr := tar.NewReader(zr) 39 40 var header *tar.Header 41 for header, err = tr.Next(); err == nil; header, err = tr.Next() { 42 size += datasize.ByteSize(header.Size) 43 } 44 if err == io.EOF { 45 err = nil 46 } 47 return size, err 48 } 49 50 // from https://github.com/mimoo/eureka/blob/master/folders.go under Apache 2 51 // 52 //nolint:gocyclo 53 func compress(ctx context.Context, src string, buf io.Writer, max datasize.ByteSize) error { 54 _, span := system.NewSpan(ctx, system.GetTracer(), "pkg/util/targzip.compress") 55 defer span.End() 56 57 // tar > gzip > buf 58 zr := gzip.NewWriter(buf) 59 tw := tar.NewWriter(zr) 60 61 // is file a folder? 62 fi, err := os.Stat(src) 63 if err != nil { 64 return err 65 } 66 mode := fi.Mode() 67 if mode.IsRegular() { 68 if fi.Size() > int64(max) { 69 return fmt.Errorf("file %s bigger than max size %s", src, max.HumanReadable()) 70 } 71 // get header 72 var header *tar.Header 73 header, err = tar.FileInfoHeader(fi, src) 74 if err != nil { 75 return err 76 } 77 // write header 78 if err = tw.WriteHeader(header); err != nil { //nolint:gocritic 79 return err 80 } 81 // get content 82 var data *os.File 83 data, err = os.Open(src) 84 if err != nil { 85 return err 86 } 87 defer closer.CloseWithLogOnError(fi.Name(), data) 88 if _, err = io.Copy(tw, data); err != nil { 89 return err 90 } 91 } else if mode.IsDir() { // folder 92 // walk through every file in the folder 93 err = filepath.Walk(src, func(file string, fi os.FileInfo, _ error) error { 94 // generate tar header 95 var header *tar.Header 96 header, err = tar.FileInfoHeader(fi, file) 97 if err != nil { 98 return err 99 } 100 101 // must provide real name 102 // (see https://golang.org/src/archive/tar/common.go?#L626) 103 header.Name = filepath.ToSlash(file) 104 105 // write header 106 if err = tw.WriteHeader(header); err != nil { //nolint:gocritic 107 return err 108 } 109 // if not a dir, write file content 110 if !fi.IsDir() { 111 var data *os.File 112 var fi os.FileInfo 113 fi, err = os.Stat(file) 114 if err != nil { 115 return err 116 } 117 if fi.Size() > int64(max) { 118 return fmt.Errorf("file %s bigger than max size %s", file, max.HumanReadable()) 119 } 120 data, err = os.Open(file) 121 if err != nil { 122 return err 123 } 124 if _, err = io.Copy(tw, data); err != nil { //nolint:gocritic 125 return err 126 } 127 closer.CloseWithLogOnError(fi.Name(), data) 128 } 129 return nil 130 }) 131 if err != nil { 132 return err 133 } 134 } else { 135 return fmt.Errorf("error: file type not supported") 136 } 137 138 // produce tar 139 if err := tw.Close(); err != nil { 140 return err 141 } 142 // produce gzip 143 if err := zr.Close(); err != nil { 144 return err 145 } 146 // 147 return nil 148 } 149 150 func decompress(src io.Reader, dst string, max datasize.ByteSize) error { 151 // ensure destination directory exists 152 err := os.Mkdir(dst, worldReadOwnerWritePermission) 153 if err != nil { 154 return err 155 } 156 157 // ungzip 158 zr, err := gzip.NewReader(src) 159 if err != nil { 160 return err 161 } 162 // untar 163 tr := tar.NewReader(zr) 164 165 // uncompress each element 166 for { 167 header, err := tr.Next() 168 if err == io.EOF { 169 break // End of archive 170 } 171 if err != nil { 172 return err 173 } 174 target := header.Name 175 176 // validate name against path traversal 177 if !validRelPath(header.Name) { 178 return fmt.Errorf("tar contained invalid name error %q", target) 179 } 180 181 // add dst + re-format slashes according to system 182 target, err = sanitizeArchivePath(dst, header.Name) 183 if err != nil { 184 return err 185 } 186 // if no join is needed, replace with ToSlash: 187 // target = filepath.ToSlash(header.Name) 188 189 // check the type 190 switch header.Typeflag { 191 // if its a dir and it doesn't exist create it (with 0755 permission) 192 case tar.TypeDir: 193 if _, err := os.Stat(target); err != nil { 194 if err := os.MkdirAll(target, worldReadOwnerWritePermission); err != nil { 195 return err 196 } 197 } 198 // if it's a file create it (with same permission) 199 case tar.TypeReg: 200 if header.Size > int64(max) { 201 return fmt.Errorf("file %s bigger than max size %s", header.Name, max.HumanReadable()) 202 } 203 fileToWrite, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) 204 if err != nil { 205 return err 206 } 207 // copy over contents (max 10MB per file!) 208 if _, err := io.CopyN(fileToWrite, tr, int64(max)); err != nil { //nolint:gomnd 209 // io.EOF is expected 210 if err != io.EOF { 211 return err 212 } 213 } 214 // manually close here after each file operation; defering would cause each file close 215 // to wait until all operations have completed. 216 fileToWrite.Close() 217 } 218 } 219 220 // 221 return nil 222 } 223 224 // check for path traversal and correct forward slashes 225 func validRelPath(p string) bool { 226 if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") { 227 return false 228 } 229 return true 230 } 231 232 // Sanitize archive file pathing from "G305: Zip Slip vulnerability" 233 func sanitizeArchivePath(d, t string) (v string, err error) { 234 v = filepath.Join(d, t) 235 if strings.HasPrefix(v, filepath.Clean(d)) { 236 return v, nil 237 } 238 239 return "", fmt.Errorf("%s: %s", "content filepath is tainted", t) 240 }