github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/untar.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "archive/tar" 22 "bufio" 23 "bytes" 24 "context" 25 "errors" 26 "fmt" 27 "io" 28 "io/fs" 29 "os" 30 "path" 31 "runtime" 32 "sync" 33 "time" 34 35 "github.com/cosnicolaou/pbzip2" 36 "github.com/klauspost/compress/s2" 37 "github.com/klauspost/compress/zstd" 38 gzip "github.com/klauspost/pgzip" 39 "github.com/minio/minio/internal/logger" 40 "github.com/pierrec/lz4" 41 ) 42 43 // Max bzip2 concurrency across calls. 50% of GOMAXPROCS. 44 var bz2Limiter = pbzip2.CreateConcurrencyPool((runtime.GOMAXPROCS(0) + 1) / 2) 45 46 func detect(r *bufio.Reader) format { 47 z, err := r.Peek(4) 48 if err != nil { 49 return formatUnknown 50 } 51 for _, f := range magicHeaders { 52 if bytes.Equal(f.header, z[:len(f.header)]) { 53 return f.f 54 } 55 } 56 return formatUnknown 57 } 58 59 //go:generate stringer -type=format -trimprefix=format $GOFILE 60 type format int 61 62 const ( 63 formatUnknown format = iota 64 formatGzip 65 formatZstd 66 formatLZ4 67 formatS2 68 formatBZ2 69 ) 70 71 var magicHeaders = []struct { 72 header []byte 73 f format 74 }{ 75 { 76 header: []byte{0x1f, 0x8b, 8}, 77 f: formatGzip, 78 }, 79 { 80 // Zstd default header. 81 header: []byte{0x28, 0xb5, 0x2f, 0xfd}, 82 f: formatZstd, 83 }, 84 { 85 // Zstd skippable frame header. 86 header: []byte{0x2a, 0x4d, 0x18}, 87 f: formatZstd, 88 }, 89 { 90 // LZ4 91 header: []byte{0x4, 0x22, 0x4d, 0x18}, 92 f: formatLZ4, 93 }, 94 { 95 // Snappy/S2 stream 96 header: []byte{0xff, 0x06, 0x00, 0x00}, 97 f: formatS2, 98 }, 99 { 100 header: []byte{0x42, 0x5a, 'h'}, 101 f: formatBZ2, 102 }, 103 } 104 105 type untarOptions struct { 106 ignoreDirs bool 107 ignoreErrs bool 108 prefixAll string 109 } 110 111 // disconnectReader will ensure that no reads can take place on 112 // the upstream reader after close has been called. 113 type disconnectReader struct { 114 r io.Reader 115 mu sync.Mutex 116 } 117 118 func (d *disconnectReader) Read(p []byte) (n int, err error) { 119 d.mu.Lock() 120 defer d.mu.Unlock() 121 if d.r != nil { 122 return d.r.Read(p) 123 } 124 return 0, errors.New("reader closed") 125 } 126 127 func (d *disconnectReader) Close() error { 128 d.mu.Lock() 129 d.r = nil 130 d.mu.Unlock() 131 return nil 132 } 133 134 func untar(ctx context.Context, r io.Reader, putObject func(reader io.Reader, info os.FileInfo, name string) error, o untarOptions) error { 135 bf := bufio.NewReader(r) 136 switch f := detect(bf); f { 137 case formatGzip: 138 gz, err := gzip.NewReader(bf) 139 if err != nil { 140 return err 141 } 142 defer gz.Close() 143 r = gz 144 case formatS2: 145 r = s2.NewReader(bf) 146 case formatZstd: 147 // Limit to 16 MiB per stream. 148 dec, err := zstd.NewReader(bf, zstd.WithDecoderMaxWindow(16<<20)) 149 if err != nil { 150 return err 151 } 152 defer dec.Close() 153 r = dec 154 case formatBZ2: 155 ctx, cancel := context.WithCancel(ctx) 156 defer cancel() 157 r = pbzip2.NewReader(ctx, bf, pbzip2.DecompressionOptions( 158 pbzip2.BZConcurrency((runtime.GOMAXPROCS(0)+1)/2), 159 pbzip2.BZConcurrencyPool(bz2Limiter))) 160 case formatLZ4: 161 r = lz4.NewReader(bf) 162 case formatUnknown: 163 r = bf 164 default: 165 return fmt.Errorf("Unsupported format %s", f) 166 } 167 tarReader := tar.NewReader(r) 168 n := 0 169 asyncWriters := make(chan struct{}, 16) 170 var wg sync.WaitGroup 171 172 var asyncErr error 173 var asyncErrMu sync.Mutex 174 for { 175 if !o.ignoreErrs { 176 asyncErrMu.Lock() 177 err := asyncErr 178 asyncErrMu.Unlock() 179 if err != nil { 180 return err 181 } 182 } 183 184 header, err := tarReader.Next() 185 switch { 186 187 // if no more files are found return 188 case err == io.EOF: 189 wg.Wait() 190 return asyncErr 191 192 // return any other error 193 case err != nil: 194 wg.Wait() 195 extra := "" 196 if n > 0 { 197 extra = fmt.Sprintf(" after %d successful object(s)", n) 198 } 199 return fmt.Errorf("tar file error: %w%s", err, extra) 200 201 // if the header is nil, just skip it (not sure how this happens) 202 case header == nil: 203 continue 204 } 205 206 name := header.Name 207 switch path.Clean(name) { 208 case ".", slashSeparator: 209 continue 210 } 211 212 switch header.Typeflag { 213 case tar.TypeDir: // = directory 214 if o.ignoreDirs { 215 continue 216 } 217 name = trimLeadingSlash(pathJoin(name, slashSeparator)) 218 case tar.TypeReg, tar.TypeChar, tar.TypeBlock, tar.TypeFifo, tar.TypeGNUSparse: // = regular 219 name = trimLeadingSlash(path.Clean(name)) 220 default: 221 // ignore symlink'ed 222 continue 223 } 224 if o.prefixAll != "" { 225 name = pathJoin(o.prefixAll, name) 226 } 227 228 // Do small files async 229 n++ 230 if header.Size <= smallFileThreshold { 231 asyncWriters <- struct{}{} 232 b := poolBuf128k.Get().([]byte) 233 if cap(b) < int(header.Size) { 234 b = make([]byte, smallFileThreshold) 235 } 236 b = b[:header.Size] 237 if _, err := io.ReadFull(tarReader, b); err != nil { 238 return err 239 } 240 wg.Add(1) 241 go func(name string, fi fs.FileInfo, b []byte) { 242 rc := disconnectReader{r: bytes.NewReader(b)} 243 defer func() { 244 rc.Close() 245 <-asyncWriters 246 wg.Done() 247 //nolint:staticcheck // SA6002 we are fine with the tiny alloc 248 poolBuf128k.Put(b) 249 }() 250 if err := putObject(&rc, fi, name); err != nil { 251 if o.ignoreErrs { 252 logger.LogIf(ctx, err) 253 return 254 } 255 asyncErrMu.Lock() 256 if asyncErr == nil { 257 asyncErr = err 258 } 259 asyncErrMu.Unlock() 260 } 261 }(name, header.FileInfo(), b) 262 continue 263 } 264 265 // If zero or earlier modtime, set to current. 266 // Otherwise the resulting objects will be invalid. 267 if header.ModTime.UnixNano() <= 0 { 268 header.ModTime = time.Now() 269 } 270 271 // Sync upload. 272 rc := disconnectReader{r: tarReader} 273 if err := putObject(&rc, header.FileInfo(), name); err != nil { 274 rc.Close() 275 if o.ignoreErrs { 276 logger.LogIf(ctx, err) 277 continue 278 } 279 return err 280 } 281 rc.Close() 282 } 283 }