github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/buffer_pool.go (about) 1 package parquet 2 3 import ( 4 "fmt" 5 "io" 6 "os" 7 "path/filepath" 8 "sync" 9 ) 10 11 // BufferPool is an interface abstracting the underlying implementation of 12 // page buffer pools. 13 // 14 // The parquet-go package provides two implementations of this interface, one 15 // backed by in-memory buffers (on the Go heap), and the other using temporary 16 // files on disk. 17 // 18 // Applications which need finer grain control over the allocation and retention 19 // of page buffers may choose to provide their own implementation and install it 20 // via the parquet.ColumnPageBuffers writer option. 21 // 22 // BufferPool implementations must be safe to use concurrently from multiple 23 // goroutines. 24 type BufferPool interface { 25 // GetBuffer is called when a parquet writer needs to acquire a new 26 // page buffer from the pool. 27 GetBuffer() io.ReadWriteSeeker 28 29 // PutBuffer is called when a parquet writer releases a page buffer to 30 // the pool. 31 // 32 // The parquet.Writer type guarantees that the buffers it calls this method 33 // with were previously acquired by a call to GetBuffer on the same 34 // pool, and that it will not use them anymore after the call. 35 PutBuffer(io.ReadWriteSeeker) 36 } 37 38 // NewBufferPool creates a new in-memory page buffer pool. 39 // 40 // The implementation is backed by sync.Pool and allocates memory buffers on the 41 // Go heap. 42 func NewBufferPool() BufferPool { return new(memoryBufferPool) } 43 44 type memoryBuffer struct { 45 data []byte 46 off int 47 } 48 49 func (p *memoryBuffer) Reset() { 50 p.data, p.off = p.data[:0], 0 51 } 52 53 func (p *memoryBuffer) Read(b []byte) (n int, err error) { 54 n = copy(b, p.data[p.off:]) 55 p.off += n 56 if p.off == len(p.data) { 57 err = io.EOF 58 } 59 return n, err 60 } 61 62 func (p *memoryBuffer) Write(b []byte) (int, error) { 63 n := copy(p.data[p.off:cap(p.data)], b) 64 p.data = p.data[:p.off+n] 65 66 if n < len(b) { 67 p.data = append(p.data, b[n:]...) 68 } 69 70 p.off += len(b) 71 return len(b), nil 72 } 73 74 func (p *memoryBuffer) WriteTo(w io.Writer) (int64, error) { 75 n, err := w.Write(p.data[p.off:]) 76 p.off += n 77 return int64(n), err 78 } 79 80 func (p *memoryBuffer) Seek(offset int64, whence int) (int64, error) { 81 switch whence { 82 case io.SeekCurrent: 83 offset += int64(p.off) 84 case io.SeekEnd: 85 offset += int64(len(p.data)) 86 } 87 if offset < 0 { 88 return 0, fmt.Errorf("seek: negative offset: %d<0", offset) 89 } 90 if offset > int64(len(p.data)) { 91 offset = int64(len(p.data)) 92 } 93 p.off = int(offset) 94 return offset, nil 95 } 96 97 type memoryBufferPool struct{ sync.Pool } 98 99 func (pool *memoryBufferPool) GetBuffer() io.ReadWriteSeeker { 100 b, _ := pool.Get().(*memoryBuffer) 101 if b == nil { 102 b = new(memoryBuffer) 103 } else { 104 b.Reset() 105 } 106 return b 107 } 108 109 func (pool *memoryBufferPool) PutBuffer(buf io.ReadWriteSeeker) { 110 if b, _ := buf.(*memoryBuffer); b != nil { 111 pool.Put(b) 112 } 113 } 114 115 type fileBufferPool struct { 116 err error 117 tempdir string 118 pattern string 119 } 120 121 // NewFileBufferPool creates a new on-disk page buffer pool. 122 func NewFileBufferPool(tempdir, pattern string) BufferPool { 123 pool := &fileBufferPool{ 124 tempdir: tempdir, 125 pattern: pattern, 126 } 127 pool.tempdir, pool.err = filepath.Abs(pool.tempdir) 128 return pool 129 } 130 131 func (pool *fileBufferPool) GetBuffer() io.ReadWriteSeeker { 132 if pool.err != nil { 133 return &errorBuffer{err: pool.err} 134 } 135 f, err := os.CreateTemp(pool.tempdir, pool.pattern) 136 if err != nil { 137 return &errorBuffer{err: err} 138 } 139 return f 140 } 141 142 func (pool *fileBufferPool) PutBuffer(buf io.ReadWriteSeeker) { 143 if f, _ := buf.(*os.File); f != nil { 144 defer f.Close() 145 os.Remove(f.Name()) 146 } 147 } 148 149 type errorBuffer struct{ err error } 150 151 func (buf *errorBuffer) Read([]byte) (int, error) { return 0, buf.err } 152 func (buf *errorBuffer) Write([]byte) (int, error) { return 0, buf.err } 153 func (buf *errorBuffer) ReadFrom(io.Reader) (int64, error) { return 0, buf.err } 154 func (buf *errorBuffer) WriteTo(io.Writer) (int64, error) { return 0, buf.err } 155 func (buf *errorBuffer) Seek(int64, int) (int64, error) { return 0, buf.err } 156 157 var ( 158 defaultColumnBufferPool memoryBufferPool 159 defaultSortingBufferPool memoryBufferPool 160 161 _ io.ReaderFrom = (*errorBuffer)(nil) 162 _ io.WriterTo = (*errorBuffer)(nil) 163 _ io.WriterTo = (*memoryBuffer)(nil) 164 ) 165 166 type readerAt struct { 167 reader io.ReadSeeker 168 offset int64 169 } 170 171 func (r *readerAt) ReadAt(b []byte, off int64) (int, error) { 172 if r.offset < 0 || off != r.offset { 173 off, err := r.reader.Seek(off, io.SeekStart) 174 if err != nil { 175 return 0, err 176 } 177 r.offset = off 178 } 179 n, err := r.reader.Read(b) 180 r.offset += int64(n) 181 return n, err 182 } 183 184 func newReaderAt(r io.ReadSeeker) io.ReaderAt { 185 if rr, ok := r.(io.ReaderAt); ok { 186 return rr 187 } 188 return &readerAt{reader: r, offset: -1} 189 }