github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/page_buffer.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  )
    10  
    11  // PageBufferPool is an interface abstracting the underlying implementation of
    12  // page buffer pools.
    13  //
    14  // The parquet-go package provides two implementations of this interface, one
    15  // backed by in-memory buffers (on the Go heap), and the other using temporary
    16  // files on disk.
    17  //
    18  // Applications which need finer grain control over the allocation and retention
    19  // of page buffers may choose to provide their own implementation and install it
    20  // via the parquet.ColumnPageBuffers writer option.
    21  //
    22  // PageBufferPool implementations must be safe to use concurrently from multiple
    23  // goroutines.
    24  type PageBufferPool interface {
    25  	// GetPageBuffer is called when a parquet writer needs to acquires a new
    26  	// page buffer from the pool.
    27  	GetPageBuffer() io.ReadWriter
    28  
    29  	// PutPageBuffer is called when a parquet writer releases a page buffer to
    30  	// the pool.
    31  	//
    32  	// The parquet.Writer type guarantees that the buffers it calls this method
    33  	// with were previously acquired by a call to GetPageBuffer on the same
    34  	// pool, and that it will not use them anymore after the call.
    35  	PutPageBuffer(io.ReadWriter)
    36  }
    37  
    38  // NewPageBufferPool creates a new in-memory page buffer pool.
    39  //
    40  // The implementation is backed by sync.Pool and allocates memory buffers on the
    41  // Go heap.
    42  func NewPageBufferPool() PageBufferPool { return new(pageBufferPool) }
    43  
    44  type pageBufferPool struct{ sync.Pool }
    45  
    46  func (pool *pageBufferPool) GetPageBuffer() io.ReadWriter {
    47  	b, _ := pool.Get().(*bytes.Buffer)
    48  	if b == nil {
    49  		b = new(bytes.Buffer)
    50  	} else {
    51  		b.Reset()
    52  	}
    53  	return b
    54  }
    55  
    56  func (pool *pageBufferPool) PutPageBuffer(buf io.ReadWriter) {
    57  	if b, _ := buf.(*bytes.Buffer); b != nil {
    58  		pool.Put(b)
    59  	}
    60  }
    61  
    62  type fileBufferPool struct {
    63  	err     error
    64  	tempdir string
    65  	pattern string
    66  }
    67  
    68  // NewFileBufferPool creates a new on-disk page buffer pool.
    69  func NewFileBufferPool(tempdir, pattern string) PageBufferPool {
    70  	pool := &fileBufferPool{
    71  		tempdir: tempdir,
    72  		pattern: pattern,
    73  	}
    74  	pool.tempdir, pool.err = filepath.Abs(pool.tempdir)
    75  	return pool
    76  }
    77  
    78  func (pool *fileBufferPool) GetPageBuffer() io.ReadWriter {
    79  	if pool.err != nil {
    80  		return &errorBuffer{err: pool.err}
    81  	}
    82  	f, err := os.CreateTemp(pool.tempdir, pool.pattern)
    83  	if err != nil {
    84  		return &errorBuffer{err: err}
    85  	}
    86  	return &fileBuffer{file: f}
    87  }
    88  
    89  func (pool *fileBufferPool) PutPageBuffer(buf io.ReadWriter) {
    90  	if f, _ := buf.(*fileBuffer); f != nil {
    91  		defer f.file.Close()
    92  		os.Remove(f.file.Name())
    93  	}
    94  }
    95  
    96  type fileBuffer struct {
    97  	file *os.File
    98  	seek int64
    99  }
   100  
   101  func (buf *fileBuffer) Read(b []byte) (int, error) {
   102  	// The *os.File tracks a single cursor which we use for write operations to
   103  	// support appending to the buffer. We need a second cursor for reads which
   104  	// is tracked by the buf.seek field, using ReadAt to read from the file at
   105  	// the current read position.
   106  	n, err := buf.file.ReadAt(b, buf.seek)
   107  	buf.seek += int64(n)
   108  	return n, err
   109  }
   110  
   111  func (buf *fileBuffer) ReadFrom(r io.Reader) (int64, error) {
   112  	return buf.file.ReadFrom(r)
   113  }
   114  
   115  func (buf *fileBuffer) Write(b []byte) (int, error) {
   116  	return buf.file.Write(b)
   117  }
   118  
   119  func (buf *fileBuffer) WriteString(s string) (int, error) {
   120  	return buf.file.WriteString(s)
   121  }
   122  
   123  type errorBuffer struct{ err error }
   124  
   125  func (buf *errorBuffer) Read([]byte) (int, error)          { return 0, buf.err }
   126  func (buf *errorBuffer) Write([]byte) (int, error)         { return 0, buf.err }
   127  func (buf *errorBuffer) WriteString(string) (int, error)   { return 0, buf.err }
   128  func (buf *errorBuffer) ReadFrom(io.Reader) (int64, error) { return 0, buf.err }
   129  func (buf *errorBuffer) WriteTo(io.Writer) (int64, error)  { return 0, buf.err }
   130  
   131  var (
   132  	defaultPageBufferPool pageBufferPool
   133  
   134  	_ io.ReaderFrom   = (*fileBuffer)(nil)
   135  	_ io.StringWriter = (*fileBuffer)(nil)
   136  
   137  	_ io.ReaderFrom   = (*errorBuffer)(nil)
   138  	_ io.WriterTo     = (*errorBuffer)(nil)
   139  	_ io.StringWriter = (*errorBuffer)(nil)
   140  )