github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/progress.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package s3select
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"runtime"
    26  	"sync"
    27  	"sync/atomic"
    28  
    29  	"github.com/cosnicolaou/pbzip2"
    30  	"github.com/klauspost/compress/s2"
    31  	"github.com/klauspost/compress/zstd"
    32  	gzip "github.com/klauspost/pgzip"
    33  	"github.com/pierrec/lz4"
    34  )
    35  
    36  type countUpReader struct {
    37  	reader    io.Reader
    38  	bytesRead int64
    39  }
    40  
    41  // Max bzip2 concurrency across calls. 50% of GOMAXPROCS.
    42  var bz2Limiter = pbzip2.CreateConcurrencyPool((runtime.GOMAXPROCS(0) + 1) / 2)
    43  
    44  func (r *countUpReader) Read(p []byte) (n int, err error) {
    45  	n, err = r.reader.Read(p)
    46  	atomic.AddInt64(&r.bytesRead, int64(n))
    47  	return n, err
    48  }
    49  
    50  func (r *countUpReader) BytesRead() int64 {
    51  	if r == nil {
    52  		return 0
    53  	}
    54  	return atomic.LoadInt64(&r.bytesRead)
    55  }
    56  
    57  func newCountUpReader(reader io.Reader) *countUpReader {
    58  	return &countUpReader{
    59  		reader: reader,
    60  	}
    61  }
    62  
    63  type progressReader struct {
    64  	rc              io.ReadCloser
    65  	scannedReader   *countUpReader
    66  	processedReader *countUpReader
    67  
    68  	closedMu sync.Mutex
    69  	closer   io.ReadCloser
    70  	closed   bool
    71  }
    72  
    73  func (pr *progressReader) Read(p []byte) (n int, err error) {
    74  	// This ensures that Close will block until Read has completed.
    75  	// This allows another goroutine to close the reader.
    76  	pr.closedMu.Lock()
    77  	defer pr.closedMu.Unlock()
    78  	if pr.closed {
    79  		return 0, errors.New("progressReader: read after Close")
    80  	}
    81  	return pr.processedReader.Read(p)
    82  }
    83  
    84  func (pr *progressReader) Close() error {
    85  	pr.closedMu.Lock()
    86  	defer pr.closedMu.Unlock()
    87  	if pr.closed {
    88  		return nil
    89  	}
    90  	pr.closed = true
    91  	if pr.closer != nil {
    92  		pr.closer.Close()
    93  	}
    94  	return pr.rc.Close()
    95  }
    96  
    97  func (pr *progressReader) Stats() (bytesScanned, bytesProcessed int64) {
    98  	if pr == nil {
    99  		return 0, 0
   100  	}
   101  	return pr.scannedReader.BytesRead(), pr.processedReader.BytesRead()
   102  }
   103  
   104  func newProgressReader(rc io.ReadCloser, compType CompressionType) (*progressReader, error) {
   105  	if rc == nil {
   106  		return nil, errors.New("newProgressReader: nil reader provided")
   107  	}
   108  	scannedReader := newCountUpReader(rc)
   109  	pr := progressReader{
   110  		rc:            rc,
   111  		scannedReader: scannedReader,
   112  	}
   113  	var r io.Reader
   114  
   115  	switch compType {
   116  	case noneType:
   117  		r = scannedReader
   118  	case gzipType:
   119  		gzr, err := gzip.NewReader(scannedReader)
   120  		if err != nil {
   121  			if errors.Is(err, gzip.ErrHeader) || errors.Is(err, gzip.ErrChecksum) {
   122  				return nil, errInvalidCompression(err, compType)
   123  			}
   124  			return nil, errTruncatedInput(err)
   125  		}
   126  		r = gzr
   127  		pr.closer = gzr
   128  	case bzip2Type:
   129  		ctx, cancel := context.WithCancel(context.Background())
   130  		r = pbzip2.NewReader(ctx, scannedReader, pbzip2.DecompressionOptions(
   131  			pbzip2.BZConcurrency((runtime.GOMAXPROCS(0)+1)/2),
   132  			pbzip2.BZConcurrencyPool(bz2Limiter),
   133  		))
   134  		pr.closer = &nopReadCloser{fn: cancel}
   135  	case zstdType:
   136  		// Set a max window of 64MB. More than reasonable.
   137  		zr, err := zstd.NewReader(scannedReader, zstd.WithDecoderConcurrency(2), zstd.WithDecoderMaxWindow(64<<20))
   138  		if err != nil {
   139  			return nil, errInvalidCompression(err, compType)
   140  		}
   141  		r = zr
   142  		pr.closer = zr.IOReadCloser()
   143  	case lz4Type:
   144  		r = lz4.NewReader(scannedReader)
   145  	case s2Type:
   146  		r = s2.NewReader(scannedReader)
   147  	case snappyType:
   148  		r = s2.NewReader(scannedReader, s2.ReaderMaxBlockSize(64<<10))
   149  	default:
   150  		return nil, errInvalidCompressionFormat(fmt.Errorf("unknown compression type '%v'", compType))
   151  	}
   152  	pr.processedReader = newCountUpReader(r)
   153  
   154  	return &pr, nil
   155  }
   156  
   157  type nopReadCloser struct {
   158  	fn func()
   159  }
   160  
   161  func (n2 *nopReadCloser) Read(p []byte) (n int, err error) {
   162  	panic("should not be called")
   163  }
   164  
   165  func (n2 *nopReadCloser) Close() error {
   166  	if n2.fn != nil {
   167  		n2.fn()
   168  	}
   169  	n2.fn = nil
   170  	return nil
   171  }