github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/buffer_pool.go (about)

     1  package parquet
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  )
    10  
    11  // BufferPool is an interface abstracting the underlying implementation of
    12  // page buffer pools.
    13  //
    14  // The parquet-go package provides two implementations of this interface, one
    15  // backed by in-memory buffers (on the Go heap), and the other using temporary
    16  // files on disk.
    17  //
    18  // Applications which need finer grain control over the allocation and retention
    19  // of page buffers may choose to provide their own implementation and install it
    20  // via the parquet.ColumnPageBuffers writer option.
    21  //
    22  // BufferPool implementations must be safe to use concurrently from multiple
    23  // goroutines.
    24  type BufferPool interface {
    25  	// GetBuffer is called when a parquet writer needs to acquire a new
    26  	// page buffer from the pool.
    27  	GetBuffer() io.ReadWriteSeeker
    28  
    29  	// PutBuffer is called when a parquet writer releases a page buffer to
    30  	// the pool.
    31  	//
    32  	// The parquet.Writer type guarantees that the buffers it calls this method
    33  	// with were previously acquired by a call to GetBuffer on the same
    34  	// pool, and that it will not use them anymore after the call.
    35  	PutBuffer(io.ReadWriteSeeker)
    36  }
    37  
    38  // NewBufferPool creates a new in-memory page buffer pool.
    39  //
    40  // The implementation is backed by sync.Pool and allocates memory buffers on the
    41  // Go heap.
    42  func NewBufferPool() BufferPool { return new(memoryBufferPool) }
    43  
    44  type memoryBuffer struct {
    45  	data []byte
    46  	off  int
    47  }
    48  
    49  func (p *memoryBuffer) Reset() {
    50  	p.data, p.off = p.data[:0], 0
    51  }
    52  
    53  func (p *memoryBuffer) Read(b []byte) (n int, err error) {
    54  	n = copy(b, p.data[p.off:])
    55  	p.off += n
    56  	if p.off == len(p.data) {
    57  		err = io.EOF
    58  	}
    59  	return n, err
    60  }
    61  
    62  func (p *memoryBuffer) Write(b []byte) (int, error) {
    63  	n := copy(p.data[p.off:cap(p.data)], b)
    64  	p.data = p.data[:p.off+n]
    65  
    66  	if n < len(b) {
    67  		p.data = append(p.data, b[n:]...)
    68  	}
    69  
    70  	p.off += len(b)
    71  	return len(b), nil
    72  }
    73  
    74  func (p *memoryBuffer) WriteTo(w io.Writer) (int64, error) {
    75  	n, err := w.Write(p.data[p.off:])
    76  	p.off += n
    77  	return int64(n), err
    78  }
    79  
    80  func (p *memoryBuffer) Seek(offset int64, whence int) (int64, error) {
    81  	switch whence {
    82  	case io.SeekCurrent:
    83  		offset += int64(p.off)
    84  	case io.SeekEnd:
    85  		offset += int64(len(p.data))
    86  	}
    87  	if offset < 0 {
    88  		return 0, fmt.Errorf("seek: negative offset: %d<0", offset)
    89  	}
    90  	if offset > int64(len(p.data)) {
    91  		offset = int64(len(p.data))
    92  	}
    93  	p.off = int(offset)
    94  	return offset, nil
    95  }
    96  
    97  type memoryBufferPool struct{ sync.Pool }
    98  
    99  func (pool *memoryBufferPool) GetBuffer() io.ReadWriteSeeker {
   100  	b, _ := pool.Get().(*memoryBuffer)
   101  	if b == nil {
   102  		b = new(memoryBuffer)
   103  	} else {
   104  		b.Reset()
   105  	}
   106  	return b
   107  }
   108  
   109  func (pool *memoryBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
   110  	if b, _ := buf.(*memoryBuffer); b != nil {
   111  		pool.Put(b)
   112  	}
   113  }
   114  
   115  type fileBufferPool struct {
   116  	err     error
   117  	tempdir string
   118  	pattern string
   119  }
   120  
   121  // NewFileBufferPool creates a new on-disk page buffer pool.
   122  func NewFileBufferPool(tempdir, pattern string) BufferPool {
   123  	pool := &fileBufferPool{
   124  		tempdir: tempdir,
   125  		pattern: pattern,
   126  	}
   127  	pool.tempdir, pool.err = filepath.Abs(pool.tempdir)
   128  	return pool
   129  }
   130  
   131  func (pool *fileBufferPool) GetBuffer() io.ReadWriteSeeker {
   132  	if pool.err != nil {
   133  		return &errorBuffer{err: pool.err}
   134  	}
   135  	f, err := os.CreateTemp(pool.tempdir, pool.pattern)
   136  	if err != nil {
   137  		return &errorBuffer{err: err}
   138  	}
   139  	return f
   140  }
   141  
   142  func (pool *fileBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
   143  	if f, _ := buf.(*os.File); f != nil {
   144  		defer f.Close()
   145  		os.Remove(f.Name())
   146  	}
   147  }
   148  
   149  type errorBuffer struct{ err error }
   150  
   151  func (buf *errorBuffer) Read([]byte) (int, error)          { return 0, buf.err }
   152  func (buf *errorBuffer) Write([]byte) (int, error)         { return 0, buf.err }
   153  func (buf *errorBuffer) ReadFrom(io.Reader) (int64, error) { return 0, buf.err }
   154  func (buf *errorBuffer) WriteTo(io.Writer) (int64, error)  { return 0, buf.err }
   155  func (buf *errorBuffer) Seek(int64, int) (int64, error)    { return 0, buf.err }
   156  
   157  var (
   158  	defaultColumnBufferPool  memoryBufferPool
   159  	defaultSortingBufferPool memoryBufferPool
   160  
   161  	_ io.ReaderFrom = (*errorBuffer)(nil)
   162  	_ io.WriterTo   = (*errorBuffer)(nil)
   163  	_ io.WriterTo   = (*memoryBuffer)(nil)
   164  )
   165  
   166  type readerAt struct {
   167  	reader io.ReadSeeker
   168  	offset int64
   169  }
   170  
   171  func (r *readerAt) ReadAt(b []byte, off int64) (int, error) {
   172  	if r.offset < 0 || off != r.offset {
   173  		off, err := r.reader.Seek(off, io.SeekStart)
   174  		if err != nil {
   175  			return 0, err
   176  		}
   177  		r.offset = off
   178  	}
   179  	n, err := r.reader.Read(b)
   180  	r.offset += int64(n)
   181  	return n, err
   182  }
   183  
   184  func newReaderAt(r io.ReadSeeker) io.ReaderAt {
   185  	if rr, ok := r.(io.ReaderAt); ok {
   186  		return rr
   187  	}
   188  	return &readerAt{reader: r, offset: -1}
   189  }