github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/progress.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package s3select 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "io" 25 "runtime" 26 "sync" 27 "sync/atomic" 28 29 "github.com/cosnicolaou/pbzip2" 30 "github.com/klauspost/compress/s2" 31 "github.com/klauspost/compress/zstd" 32 gzip "github.com/klauspost/pgzip" 33 "github.com/pierrec/lz4" 34 ) 35 36 type countUpReader struct { 37 reader io.Reader 38 bytesRead int64 39 } 40 41 // Max bzip2 concurrency across calls. 50% of GOMAXPROCS. 42 var bz2Limiter = pbzip2.CreateConcurrencyPool((runtime.GOMAXPROCS(0) + 1) / 2) 43 44 func (r *countUpReader) Read(p []byte) (n int, err error) { 45 n, err = r.reader.Read(p) 46 atomic.AddInt64(&r.bytesRead, int64(n)) 47 return n, err 48 } 49 50 func (r *countUpReader) BytesRead() int64 { 51 if r == nil { 52 return 0 53 } 54 return atomic.LoadInt64(&r.bytesRead) 55 } 56 57 func newCountUpReader(reader io.Reader) *countUpReader { 58 return &countUpReader{ 59 reader: reader, 60 } 61 } 62 63 type progressReader struct { 64 rc io.ReadCloser 65 scannedReader *countUpReader 66 processedReader *countUpReader 67 68 closedMu sync.Mutex 69 closer io.ReadCloser 70 closed bool 71 } 72 73 func (pr *progressReader) Read(p []byte) (n int, err error) { 74 // This ensures that Close will block until Read has completed. 75 // This allows another goroutine to close the reader. 76 pr.closedMu.Lock() 77 defer pr.closedMu.Unlock() 78 if pr.closed { 79 return 0, errors.New("progressReader: read after Close") 80 } 81 return pr.processedReader.Read(p) 82 } 83 84 func (pr *progressReader) Close() error { 85 pr.closedMu.Lock() 86 defer pr.closedMu.Unlock() 87 if pr.closed { 88 return nil 89 } 90 pr.closed = true 91 if pr.closer != nil { 92 pr.closer.Close() 93 } 94 return pr.rc.Close() 95 } 96 97 func (pr *progressReader) Stats() (bytesScanned, bytesProcessed int64) { 98 if pr == nil { 99 return 0, 0 100 } 101 return pr.scannedReader.BytesRead(), pr.processedReader.BytesRead() 102 } 103 104 func newProgressReader(rc io.ReadCloser, compType CompressionType) (*progressReader, error) { 105 if rc == nil { 106 return nil, errors.New("newProgressReader: nil reader provided") 107 } 108 scannedReader := newCountUpReader(rc) 109 pr := progressReader{ 110 rc: rc, 111 scannedReader: scannedReader, 112 } 113 var r io.Reader 114 115 switch compType { 116 case noneType: 117 r = scannedReader 118 case gzipType: 119 gzr, err := gzip.NewReader(scannedReader) 120 if err != nil { 121 if errors.Is(err, gzip.ErrHeader) || errors.Is(err, gzip.ErrChecksum) { 122 return nil, errInvalidCompression(err, compType) 123 } 124 return nil, errTruncatedInput(err) 125 } 126 r = gzr 127 pr.closer = gzr 128 case bzip2Type: 129 ctx, cancel := context.WithCancel(context.Background()) 130 r = pbzip2.NewReader(ctx, scannedReader, pbzip2.DecompressionOptions( 131 pbzip2.BZConcurrency((runtime.GOMAXPROCS(0)+1)/2), 132 pbzip2.BZConcurrencyPool(bz2Limiter), 133 )) 134 pr.closer = &nopReadCloser{fn: cancel} 135 case zstdType: 136 // Set a max window of 64MB. More than reasonable. 137 zr, err := zstd.NewReader(scannedReader, zstd.WithDecoderConcurrency(2), zstd.WithDecoderMaxWindow(64<<20)) 138 if err != nil { 139 return nil, errInvalidCompression(err, compType) 140 } 141 r = zr 142 pr.closer = zr.IOReadCloser() 143 case lz4Type: 144 r = lz4.NewReader(scannedReader) 145 case s2Type: 146 r = s2.NewReader(scannedReader) 147 case snappyType: 148 r = s2.NewReader(scannedReader, s2.ReaderMaxBlockSize(64<<10)) 149 default: 150 return nil, errInvalidCompressionFormat(fmt.Errorf("unknown compression type '%v'", compType)) 151 } 152 pr.processedReader = newCountUpReader(r) 153 154 return &pr, nil 155 } 156 157 type nopReadCloser struct { 158 fn func() 159 } 160 161 func (n2 *nopReadCloser) Read(p []byte) (n int, err error) { 162 panic("should not be called") 163 } 164 165 func (n2 *nopReadCloser) Close() error { 166 if n2.fn != nil { 167 n2.fn() 168 } 169 n2.fn = nil 170 return nil 171 }