github.com/sealerio/sealer@v0.11.1-0.20240507115618-f4f89c5853ae/utils/archive/compress.go (about) 1 // Copyright © 2021 Alibaba Group Holding Ltd. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package archive 16 17 import ( 18 "archive/tar" 19 "bufio" 20 "compress/gzip" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "path/filepath" 26 "strings" 27 "syscall" 28 29 "golang.org/x/sys/unix" 30 31 "github.com/sirupsen/logrus" 32 33 "github.com/sealerio/sealer/common" 34 ) 35 36 const compressionBufSize = 32768 37 38 type Options struct { 39 Compress bool 40 KeepRootDir bool 41 ToStream bool 42 } 43 44 func validatePath(path string) error { 45 if _, err := os.Stat(path); err != nil { 46 return fmt.Errorf("dir %s does not exist, err: %s", path, err) 47 } 48 return nil 49 } 50 51 // TarWithRootDir 52 // src is the dir or single file to tar 53 // not contain the dir 54 // newFolder is a folder for tar file 55 func TarWithRootDir(paths ...string) (readCloser io.ReadCloser, err error) { 56 return compress(paths, Options{Compress: false, KeepRootDir: true}) 57 } 58 59 // TarWithoutRootDir function will tar files, but without keeping the original dir 60 // this is useful when we tar files at the build stage 61 func TarWithoutRootDir(paths ...string) (readCloser io.ReadCloser, err error) { 62 return compress(paths, Options{Compress: false, KeepRootDir: false}) 63 } 64 65 func Untar(src io.Reader, dst string) (int64, error) { 66 return Decompress(src, dst, Options{Compress: false}) 67 } 68 69 // GzipCompress make the tar stream to be gzip stream. 70 func GzipCompress(in io.Reader) (io.ReadCloser, chan struct{}) { 71 compressionDone := make(chan struct{}) 72 73 pipeReader, pipeWriter := io.Pipe() 74 // Use a bufio.Writer to avoid excessive chunking in HTTP request. 75 bufWriter := bufio.NewWriterSize(pipeWriter, compressionBufSize) 76 compressor := gzip.NewWriter(bufWriter) 77 78 go func() { 79 _, err := io.Copy(compressor, in) 80 if err == nil { 81 err = compressor.Close() 82 } 83 if err == nil { 84 err = bufWriter.Flush() 85 } 86 if err != nil { 87 // leave this err 88 _ = pipeWriter.CloseWithError(err) 89 } else { 90 err := pipeWriter.Close() 91 if err != nil { 92 return 93 } 94 } 95 close(compressionDone) 96 }() 97 98 return pipeReader, compressionDone 99 } 100 101 func compress(paths []string, options Options) (reader io.ReadCloser, err error) { 102 if len(paths) == 0 { 103 return nil, errors.New("[archive] source must be provided") 104 } 105 for _, path := range paths { 106 err = validatePath(path) 107 if err != nil { 108 return nil, err 109 } 110 } 111 112 pr, pw := io.Pipe() 113 tw := tar.NewWriter(pw) 114 bufWriter := bufio.NewWriterSize(nil, compressionBufSize) 115 if options.Compress { 116 tw = tar.NewWriter(gzip.NewWriter(pw)) 117 } 118 go func() { 119 defer func() { 120 err := tw.Close() 121 if err != nil { 122 return 123 } 124 err = pw.Close() 125 if err != nil { 126 return 127 } 128 }() 129 130 for _, path := range paths { 131 err = writeToTarWriter(path, tw, bufWriter, options) 132 if err != nil { 133 _ = pw.CloseWithError(err) 134 } 135 } 136 }() 137 138 return pr, nil 139 } 140 141 func writeWhiteout(header *tar.Header, fi os.FileInfo, path string) *tar.Header { 142 // overlay whiteout process 143 // this is a whiteout file 144 if fi.Mode()&os.ModeCharDevice != 0 && header.Devminor == 0 && header.Devmajor == 0 { 145 hName := header.Name 146 header.Name = filepath.Join(filepath.Dir(hName), WhiteoutPrefix+filepath.Base(hName)) 147 header.Mode = 0600 148 header.Typeflag = tar.TypeReg 149 header.Size = 0 150 } 151 152 var woh *tar.Header 153 if fi.Mode()&os.ModeDir != 0 { 154 opaque, walkErr := Lgetxattr(path, "trusted.overlay.opaque") 155 if walkErr != nil { 156 logrus.Debugf("failed to get trusted.overlay.opaque for %s at opaque, err: %v", path, walkErr) 157 } 158 159 if len(opaque) == 1 && opaque[0] == 'y' { 160 if header.PAXRecords != nil { 161 delete(header.PAXRecords, "trusted.overlay.opaque") 162 } 163 164 woh = &tar.Header{ 165 Typeflag: tar.TypeReg, 166 Mode: header.Mode & int64(os.ModePerm), 167 // #nosec 168 Name: filepath.Join(header.Name, WhiteoutOpaqueDir), 169 Size: 0, 170 Uid: header.Uid, 171 Uname: header.Uname, 172 Gid: header.Gid, 173 Gname: header.Gname, 174 AccessTime: header.AccessTime, 175 ChangeTime: header.ChangeTime, 176 } 177 } 178 } 179 return woh 180 } 181 182 func readWhiteout(hdr *tar.Header, path string) (bool, error) { 183 var ( 184 base = filepath.Base(path) 185 dir = filepath.Dir(path) 186 err error 187 ) 188 189 switch { 190 case base == WhiteoutOpaqueDir: 191 err = unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0) 192 return false, err 193 case strings.HasPrefix(base, WhiteoutPrefix): 194 oBase := base[len(WhiteoutPrefix):] 195 oPath := filepath.Join(dir, oBase) 196 197 // make a whiteout file 198 err = unix.Mknod(oPath, unix.S_IFCHR, 0) 199 if err != nil { 200 return false, err 201 } 202 return false, os.Chown(oPath, hdr.Uid, hdr.Gid) 203 } 204 205 return true, nil 206 } 207 208 func writeToTarWriter(path string, tarWriter *tar.Writer, bufWriter *bufio.Writer, options Options) error { 209 var newFolder string 210 if options.KeepRootDir { 211 fi, err := os.Stat(path) 212 if err != nil { 213 return err 214 } 215 if fi.IsDir() { 216 newFolder = filepath.Base(path) 217 } 218 } 219 220 dir := strings.TrimSuffix(path, "/") 221 srcPrefix := filepath.ToSlash(dir + "/") 222 err := filepath.Walk(dir, func(file string, fi os.FileInfo, err error) error { 223 // generate tar header 224 header, walkErr := tar.FileInfoHeader(fi, file) 225 if walkErr != nil { 226 return walkErr 227 } 228 // root dir 229 if file != dir { 230 absPath := filepath.ToSlash(file) 231 header.Name = filepath.Join(newFolder, strings.TrimPrefix(absPath, srcPrefix)) 232 } else { 233 // do not contain root dir 234 if fi.IsDir() { 235 return nil 236 } 237 // for supporting tar single file 238 header.Name = filepath.Join(newFolder, filepath.Base(dir)) 239 } 240 // if current file is whiteout, the header has been changed, 241 // and we write a reg header into tar stream, but will not read its content 242 // cause doing so will lead to error. (its size is 0) 243 244 // if current target is dir, we will check if it is an opaque. 245 // and set add Suffix WhiteoutOpaqueDir for opaque. 246 // but we still need to write its original header into tar stream, 247 // because we need to create dir on this original header. 248 woh := writeWhiteout(header, fi, file) 249 walkErr = tarWriter.WriteHeader(header) 250 if walkErr != nil { 251 return fmt.Errorf("failed to write original header, path: %s, err: %v", file, walkErr) 252 } 253 // this is an opaque, write the opaque header, in order to set header.PAXRecords with trusted.overlay.opaque:y 254 // when decompress the tar stream. 255 if woh != nil { 256 walkErr = tarWriter.WriteHeader(woh) 257 if walkErr != nil { 258 return fmt.Errorf("failed to write opaque header, path: %s, err: %v", file, walkErr) 259 } 260 } 261 // if not a dir && size > 0, write file content 262 // the whiteout size is 0 263 if header.Typeflag == tar.TypeReg && header.Size > 0 { 264 var fHandler *os.File 265 fHandler, walkErr = os.Open(filepath.Clean(file)) 266 if walkErr != nil { 267 return walkErr 268 } 269 defer func() { 270 if err := fHandler.Close(); err != nil { 271 logrus.Errorf("failed to close file: %v", err) 272 } 273 }() 274 bufWriter.Reset(tarWriter) 275 defer bufWriter.Reset(nil) 276 277 _, walkErr = io.Copy(bufWriter, fHandler) 278 if walkErr != nil { 279 return walkErr 280 } 281 282 walkErr = bufWriter.Flush() 283 if walkErr != nil { 284 return walkErr 285 } 286 } 287 return nil 288 }) 289 290 return err 291 } 292 293 func removePreviousFiles(path string) error { 294 dir := filepath.Dir(path) 295 existPath := path 296 if base := filepath.Base(path); strings.HasPrefix(base, WhiteoutPrefix) { 297 existPath = filepath.Join(dir, strings.TrimPrefix(base, WhiteoutPrefix)) 298 } 299 300 if _, err := os.Stat(existPath); err == nil { 301 if err := os.RemoveAll(existPath); err != nil { 302 return err 303 } 304 } 305 return nil 306 } 307 308 // Decompress this will not change the metadata of original files 309 func Decompress(src io.Reader, dst string, options Options) (int64, error) { 310 // need to set umask to be 000 for current process. 311 // there will be some files having higher permission like 777, 312 // eventually permission will be set to 755 when umask is 022. 313 oldMask := syscall.Umask(0) 314 defer syscall.Umask(oldMask) 315 316 err := os.MkdirAll(dst, common.FileMode0755) 317 if err != nil { 318 return 0, err 319 } 320 321 reader := src 322 if options.Compress { 323 reader, err = gzip.NewReader(src) 324 if err != nil { 325 return 0, err 326 } 327 } 328 329 var ( 330 size int64 331 dirs []*tar.Header 332 tr = tar.NewReader(reader) 333 ) 334 for { 335 header, err := tr.Next() 336 if err == io.EOF { 337 break 338 } 339 if err != nil { 340 return 0, err 341 } 342 size += header.Size 343 // validate name against path traversal 344 if !validRelPath(header.Name) { 345 return 0, fmt.Errorf("tar contained invalid name error %q", header.Name) 346 } 347 348 // #nosec 349 target := filepath.Join(dst, header.Name) 350 err = removePreviousFiles(target) 351 if err != nil { 352 return 0, err 353 } 354 355 goon, err := readWhiteout(header, target) 356 if err != nil { 357 return 0, err 358 } 359 // it is an opaque / whiteout, don't write its file content. 360 if !goon { 361 continue 362 } 363 364 switch header.Typeflag { 365 case tar.TypeDir: 366 if _, err = os.Stat(target); err != nil { 367 if err = os.MkdirAll(target, os.FileMode(header.Mode)); err != nil { 368 return 0, err 369 } 370 dirs = append(dirs, header) 371 } 372 373 case tar.TypeReg: 374 err = func() error { 375 // regularly won't mkdir, unless add newFolder on compressing 376 inErr := os.MkdirAll(filepath.Dir(target), 0700|0055) 377 if inErr != nil { 378 return inErr 379 } 380 // #nosec 381 fileToWrite, inErr := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.FileMode(header.Mode)) 382 if inErr != nil { 383 return inErr 384 } 385 386 defer func() { 387 if err := fileToWrite.Close(); err != nil { 388 logrus.Errorf("failed to close file: %v", err) 389 } 390 }() 391 if _, inErr = io.Copy(fileToWrite, tr); inErr != nil { 392 return inErr 393 } 394 // for not changing 395 return os.Chtimes(target, header.AccessTime, header.ModTime) 396 }() 397 398 if err != nil { 399 return 0, err 400 } 401 } 402 } 403 404 for _, h := range dirs { 405 // #nosec 406 path := filepath.Join(dst, h.Name) 407 err = os.Chtimes(path, h.AccessTime, h.ModTime) 408 if err != nil { 409 return 0, err 410 } 411 } 412 413 return size, nil 414 } 415 416 // check for path traversal and correct forward slashes 417 func validRelPath(p string) bool { 418 if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") { 419 return false 420 } 421 return true 422 }