github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/page_buffer.go (about) 1 package parquet 2 3 import ( 4 "bytes" 5 "io" 6 "os" 7 "path/filepath" 8 "sync" 9 ) 10 11 // PageBufferPool is an interface abstracting the underlying implementation of 12 // page buffer pools. 13 // 14 // The parquet-go package provides two implementations of this interface, one 15 // backed by in-memory buffers (on the Go heap), and the other using temporary 16 // files on disk. 17 // 18 // Applications which need finer grain control over the allocation and retention 19 // of page buffers may choose to provide their own implementation and install it 20 // via the parquet.ColumnPageBuffers writer option. 21 // 22 // PageBufferPool implementations must be safe to use concurrently from multiple 23 // goroutines. 24 type PageBufferPool interface { 25 // GetPageBuffer is called when a parquet writer needs to acquires a new 26 // page buffer from the pool. 27 GetPageBuffer() io.ReadWriter 28 29 // PutPageBuffer is called when a parquet writer releases a page buffer to 30 // the pool. 31 // 32 // The parquet.Writer type guarantees that the buffers it calls this method 33 // with were previously acquired by a call to GetPageBuffer on the same 34 // pool, and that it will not use them anymore after the call. 35 PutPageBuffer(io.ReadWriter) 36 } 37 38 // NewPageBufferPool creates a new in-memory page buffer pool. 39 // 40 // The implementation is backed by sync.Pool and allocates memory buffers on the 41 // Go heap. 42 func NewPageBufferPool() PageBufferPool { return new(pageBufferPool) } 43 44 type pageBufferPool struct{ sync.Pool } 45 46 func (pool *pageBufferPool) GetPageBuffer() io.ReadWriter { 47 b, _ := pool.Get().(*bytes.Buffer) 48 if b == nil { 49 b = new(bytes.Buffer) 50 } else { 51 b.Reset() 52 } 53 return b 54 } 55 56 func (pool *pageBufferPool) PutPageBuffer(buf io.ReadWriter) { 57 if b, _ := buf.(*bytes.Buffer); b != nil { 58 pool.Put(b) 59 } 60 } 61 62 type fileBufferPool struct { 63 err error 64 tempdir string 65 pattern string 66 } 67 68 // NewFileBufferPool creates a new on-disk page buffer pool. 69 func NewFileBufferPool(tempdir, pattern string) PageBufferPool { 70 pool := &fileBufferPool{ 71 tempdir: tempdir, 72 pattern: pattern, 73 } 74 pool.tempdir, pool.err = filepath.Abs(pool.tempdir) 75 return pool 76 } 77 78 func (pool *fileBufferPool) GetPageBuffer() io.ReadWriter { 79 if pool.err != nil { 80 return &errorBuffer{err: pool.err} 81 } 82 f, err := os.CreateTemp(pool.tempdir, pool.pattern) 83 if err != nil { 84 return &errorBuffer{err: err} 85 } 86 return &fileBuffer{file: f} 87 } 88 89 func (pool *fileBufferPool) PutPageBuffer(buf io.ReadWriter) { 90 if f, _ := buf.(*fileBuffer); f != nil { 91 defer f.file.Close() 92 os.Remove(f.file.Name()) 93 } 94 } 95 96 type fileBuffer struct { 97 file *os.File 98 seek int64 99 } 100 101 func (buf *fileBuffer) Read(b []byte) (int, error) { 102 // The *os.File tracks a single cursor which we use for write operations to 103 // support appending to the buffer. We need a second cursor for reads which 104 // is tracked by the buf.seek field, using ReadAt to read from the file at 105 // the current read position. 106 n, err := buf.file.ReadAt(b, buf.seek) 107 buf.seek += int64(n) 108 return n, err 109 } 110 111 func (buf *fileBuffer) ReadFrom(r io.Reader) (int64, error) { 112 return buf.file.ReadFrom(r) 113 } 114 115 func (buf *fileBuffer) Write(b []byte) (int, error) { 116 return buf.file.Write(b) 117 } 118 119 func (buf *fileBuffer) WriteString(s string) (int, error) { 120 return buf.file.WriteString(s) 121 } 122 123 type errorBuffer struct{ err error } 124 125 func (buf *errorBuffer) Read([]byte) (int, error) { return 0, buf.err } 126 func (buf *errorBuffer) Write([]byte) (int, error) { return 0, buf.err } 127 func (buf *errorBuffer) WriteString(string) (int, error) { return 0, buf.err } 128 func (buf *errorBuffer) ReadFrom(io.Reader) (int64, error) { return 0, buf.err } 129 func (buf *errorBuffer) WriteTo(io.Writer) (int64, error) { return 0, buf.err } 130 131 var ( 132 defaultPageBufferPool pageBufferPool 133 134 _ io.ReaderFrom = (*fileBuffer)(nil) 135 _ io.StringWriter = (*fileBuffer)(nil) 136 137 _ io.ReaderFrom = (*errorBuffer)(nil) 138 _ io.WriterTo = (*errorBuffer)(nil) 139 _ io.StringWriter = (*errorBuffer)(nil) 140 )