github.com/bazelbuild/remote-apis-sdks@v0.0.0-20240425170053-8a36686a6350/go/pkg/reader/reader.go (about)

     1  package reader
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"sync"
    11  
    12  	log "github.com/golang/glog"
    13  	"github.com/klauspost/compress/zstd"
    14  )
    15  
    16  // errNotInitialized is the error returned from Read() by a ReedSeeker that
    17  // hasn't yet had Initialize() called.
    18  //
    19  // stylecheck is disabled because the error text starts with a capital letter,
    20  // and changing the text would be an API change.
    21  var errNotInitialized = errors.New("Not yet initialized") // nolint:stylecheck
    22  
    23  // Initializable is an interface containing methods to initialize a ReadSeeker.
    24  type Initializable interface {
    25  	IsInitialized() bool
    26  	Initialize() error
    27  }
    28  
    29  // ReadSeeker is an interface used to capture a file reader with seek functionality.
    30  type ReadSeeker interface {
    31  	io.Reader
    32  	io.Closer
    33  	Initializable
    34  	SeekOffset(offset int64) error
    35  }
    36  
    37  type fileSeeker struct {
    38  	reader *bufio.Reader
    39  
    40  	f           *os.File
    41  	path        string
    42  	buffSize    int
    43  	seekOffset  int64
    44  	initialized bool
    45  }
    46  
    47  // NewFileReadSeeker wraps a buffered file reader with Seeking functionality.
    48  // Notice that Seek calls un-set the reader and require Initialize calls. This
    49  // is to avoid potentially unnecessary disk IO.
    50  func NewFileReadSeeker(path string, buffsize int) ReadSeeker {
    51  	return &fileSeeker{
    52  		f:           nil,
    53  		path:        path,
    54  		buffSize:    buffsize,
    55  		seekOffset:  0,
    56  		initialized: false,
    57  	}
    58  }
    59  
    60  // Close closes the reader. It still can be reopened with Initialize().
    61  func (fio *fileSeeker) Close() (err error) {
    62  	fio.initialized = false
    63  	if fio.f != nil {
    64  		err = fio.f.Close()
    65  	}
    66  	fio.f = nil
    67  	fio.reader = nil
    68  	return err
    69  }
    70  
    71  // Read implements io.Reader.
    72  func (fio *fileSeeker) Read(p []byte) (int, error) {
    73  	if !fio.IsInitialized() {
    74  		return 0, errNotInitialized
    75  	}
    76  
    77  	return fio.reader.Read(p)
    78  }
    79  
    80  // Seek is a simplified version of io.Seeker. It only supports offsets from the
    81  // beginning of the file, and it errors lazily at the next Initialize.
    82  func (fio *fileSeeker) SeekOffset(offset int64) error {
    83  	fio.seekOffset = offset
    84  	fio.initialized = false
    85  	fio.reader = nil
    86  	return nil
    87  }
    88  
    89  // IsInitialized indicates whether this reader is ready. If false, Read calls
    90  // will fail.
    91  func (fio *fileSeeker) IsInitialized() bool {
    92  	return fio.initialized
    93  }
    94  
    95  // Initialize does the required IO pre-work for Read calls to function.
    96  func (fio *fileSeeker) Initialize() error {
    97  	if fio.initialized {
    98  		return errors.New("Already initialized")
    99  	}
   100  
   101  	if fio.f == nil {
   102  		var err error
   103  		fio.f, err = os.Open(fio.path)
   104  		if err != nil {
   105  			return err
   106  		}
   107  	}
   108  
   109  	off, err := fio.f.Seek(fio.seekOffset, io.SeekStart)
   110  	if err != nil {
   111  		return err
   112  	}
   113  	if off != fio.seekOffset {
   114  		return fmt.Errorf("File seeking ended at %d. Expected %d,", off, fio.seekOffset)
   115  	}
   116  
   117  	if fio.reader == nil {
   118  		fio.reader = bufio.NewReaderSize(fio.f, fio.buffSize)
   119  	} else {
   120  		fio.reader.Reset(fio.f)
   121  	}
   122  	fio.initialized = true
   123  	return nil
   124  }
   125  
   126  // The zstd encoder lib will async write to the buffer, so we need
   127  // to lock access to actually check for contents.
   128  type syncedBuffer struct {
   129  	mu  sync.Mutex
   130  	buf *bytes.Buffer
   131  }
   132  
   133  func (sb *syncedBuffer) Read(p []byte) (int, error) {
   134  	sb.mu.Lock()
   135  	defer sb.mu.Unlock()
   136  	return sb.buf.Read(p)
   137  }
   138  
   139  func (sb *syncedBuffer) Write(p []byte) (int, error) {
   140  	sb.mu.Lock()
   141  	defer sb.mu.Unlock()
   142  	return sb.buf.Write(p)
   143  }
   144  
   145  func (sb *syncedBuffer) Len() int {
   146  	sb.mu.Lock()
   147  	defer sb.mu.Unlock()
   148  	return sb.buf.Len()
   149  }
   150  
   151  func (sb *syncedBuffer) Reset() {
   152  	sb.mu.Lock()
   153  	defer sb.mu.Unlock()
   154  	sb.buf.Reset()
   155  }
   156  
   157  type compressedSeeker struct {
   158  	fs    ReadSeeker
   159  	encdW *zstd.Encoder
   160  	// This keeps the compressed data
   161  	buf *syncedBuffer
   162  }
   163  
   164  var encoderInit sync.Once
   165  var encoders *sync.Pool
   166  
   167  // NewCompressedFileSeeker creates a ReadSeeker based on a file path.
   168  func NewCompressedFileSeeker(path string, buffsize int) (ReadSeeker, error) {
   169  	return NewCompressedSeeker(NewFileReadSeeker(path, buffsize))
   170  }
   171  
   172  // NewCompressedSeeker wraps a ReadSeeker to compress its data on the fly.
   173  func NewCompressedSeeker(fs ReadSeeker) (ReadSeeker, error) {
   174  	if _, ok := fs.(*compressedSeeker); ok {
   175  		return nil, errors.New("trying to double compress files")
   176  	}
   177  
   178  	encoderInit.Do(func() {
   179  		encoders = &sync.Pool{
   180  			New: func() interface{} {
   181  				e, err := zstd.NewWriter(nil, zstd.WithEncoderConcurrency(1))
   182  				if err != nil {
   183  					log.Errorf("Error creating new encoder: %v", err)
   184  					return nil
   185  				}
   186  				return e
   187  			},
   188  		}
   189  	})
   190  
   191  	buf := bytes.NewBuffer(nil)
   192  	sb := &syncedBuffer{buf: buf}
   193  
   194  	encdIntf := encoders.Get()
   195  	encdW, ok := encdIntf.(*zstd.Encoder)
   196  	if !ok || encdW == nil {
   197  		return nil, errors.New("failed creating new encoder")
   198  	}
   199  
   200  	encdW.Reset(sb)
   201  	return &compressedSeeker{
   202  		fs:    fs,
   203  		encdW: encdW,
   204  		buf:   sb,
   205  	}, nil
   206  }
   207  
   208  func (cfs *compressedSeeker) Read(p []byte) (int, error) {
   209  	if !cfs.IsInitialized() {
   210  		return 0, errNotInitialized
   211  	}
   212  
   213  	var err error
   214  	// Repeatedly encode chunks of input data until there's enough compressed
   215  	// data to fill the output buffer. It can't be known ahead of time how much
   216  	// uncompressed data will correspond to the desired amount of output
   217  	// compressed data, hence the need for a loop.
   218  	//
   219  	// err will be nil until the loop encounters an error. cfs.encdW will be nil
   220  	// when entering the loop if a previous Read call encountered an error or
   221  	// reached an EOF, in which case there's no more data to encode.
   222  	for cfs.buf.Len() < len(p) && err == nil && cfs.encdW != nil {
   223  		var n int
   224  		// Read is allowed to use the entirety of p as a scratchpad.
   225  		n, err = cfs.fs.Read(p)
   226  		// errW must be non-nil if written bytes != n.
   227  		_, errW := cfs.encdW.Write(p[:n])
   228  		if errW != nil && (err == nil || err == io.EOF) {
   229  			err = errW
   230  		}
   231  	}
   232  
   233  	if err != nil {
   234  		// When the buffer ends (EOF), or in case of an unexpected error,
   235  		// compress remaining available bytes. The encoder requires a Close call
   236  		// to finish writing compressed data smaller than zstd's window size.
   237  		closeErr := cfs.encdW.Close()
   238  		if err == io.EOF {
   239  			err = closeErr
   240  		}
   241  		encoders.Put(cfs.encdW)
   242  		cfs.encdW = nil
   243  	}
   244  
   245  	n, readErr := cfs.buf.Read(p)
   246  	if err == nil {
   247  		err = readErr
   248  	}
   249  	return n, err
   250  }
   251  
   252  func (cfs *compressedSeeker) SeekOffset(offset int64) error {
   253  	cfs.buf.Reset()
   254  	if cfs.encdW == nil {
   255  		encdIntf := encoders.Get()
   256  		var ok bool
   257  		cfs.encdW, ok = encdIntf.(*zstd.Encoder)
   258  		if !ok || cfs.encdW == nil {
   259  			return errors.New("failed to get a new encoder")
   260  		}
   261  	} else if err := cfs.encdW.Close(); err != nil {
   262  		encoders.Put(cfs.encdW)
   263  		cfs.encdW = nil
   264  		return err
   265  	}
   266  
   267  	cfs.buf.Reset()
   268  	cfs.encdW.Reset(cfs.buf)
   269  	return cfs.fs.SeekOffset(offset)
   270  }
   271  
   272  func (cfs *compressedSeeker) IsInitialized() bool { return cfs.fs.IsInitialized() }
   273  func (cfs *compressedSeeker) Initialize() error   { return cfs.fs.Initialize() }
   274  
   275  // No need for close to close the encoder - that's handled by Read.
   276  func (cfs *compressedSeeker) Close() error { return cfs.fs.Close() }