github.com/grafana/pyroscope@v1.18.0/pkg/objstore/parquet/reader.go (about)

     1  package parquet
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"sync"
     8  
     9  	phlareobjstore "github.com/grafana/pyroscope/pkg/objstore"
    10  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    11  )
    12  
    13  // bufferPool is a pool of bytes.Buffers.
    14  var bufferPool = sync.Pool{
    15  	New: func() interface{} {
    16  		buf := make([]byte, 0, 32*1024)
    17  		return &buf
    18  	},
    19  }
    20  
    21  type optimizedReaderAt struct {
    22  	phlareobjstore.ReaderAtCloser
    23  	meta block.File
    24  
    25  	footerCache *[]byte
    26  	footerLock  sync.RWMutex
    27  	footerLen   uint64
    28  }
    29  
    30  // NewOptimizedReader returns a reader that optimizes the reading of the parquet file.
    31  func NewOptimizedReader(r phlareobjstore.ReaderAtCloser, meta block.File) phlareobjstore.ReaderAtCloser {
    32  	var footerLen uint64
    33  
    34  	// as long as we don't keep the exact footer sizes in the meta estimate it
    35  	if meta.SizeBytes > 0 {
    36  		footerLen = meta.SizeBytes / uint64(10000)
    37  	}
    38  
    39  	// set a minimum footer size of 32KiB
    40  	if footerLen < 32*1024 {
    41  		footerLen = 32 * 1024
    42  	}
    43  
    44  	// set a maximum footer size of 512KiB
    45  	if footerLen > 512*1024 {
    46  		footerLen = 512 * 1024
    47  	}
    48  
    49  	// now check clamp it to the actual size of the whole object
    50  	if footerLen > meta.SizeBytes {
    51  		footerLen = meta.SizeBytes
    52  	}
    53  
    54  	return &optimizedReaderAt{
    55  		ReaderAtCloser: r,
    56  		meta:           meta,
    57  		footerLen:      footerLen,
    58  	}
    59  }
    60  
    61  // // called by parquet-go in OpenFile() to set offset and length of footer section
    62  // func (r *optimizedReaderAt) SetFooterSection(offset, length int64) {
    63  // 	// todo cache footer section
    64  // }
    65  
    66  // // called by parquet-go in OpenFile() to set offset and length of column indexes
    67  // func (r *optimizedReaderAt) SetColumnIndexSection(offset, length int64) {
    68  // 	// todo cache column index section
    69  // }
    70  
    71  // // called by parquet-go in OpenFile() to set offset and length of offset index section
    72  // func (r *optimizedReaderAt) SetOffsetIndexSection(offset, length int64) {
    73  // 	// todo cache offset index section
    74  // }
    75  
    76  const magic = "PAR1"
    77  
    78  // note cache needs to be held to call this method
    79  func (r *optimizedReaderAt) serveFromCache(p []byte, off int64) (int, error) {
    80  	if r.footerCache == nil {
    81  		return 0, errors.New("footerCache is nil")
    82  	}
    83  	// recalculate offset to start at the cache
    84  	off = off - int64(r.meta.SizeBytes) + int64(r.footerLen)
    85  	return copy(p, (*r.footerCache)[int(off):int(off)+len(p)]), nil
    86  }
    87  
    88  func (r *optimizedReaderAt) clearFooterCache() {
    89  	r.footerLock.Lock()
    90  	defer r.footerLock.Unlock()
    91  	if r.footerCache != nil {
    92  		bufferPool.Put(r.footerCache)
    93  		r.footerCache = nil
    94  	}
    95  
    96  }
    97  
    98  func (r *optimizedReaderAt) Close() (err error) {
    99  	r.clearFooterCache()
   100  	return r.ReaderAtCloser.Close()
   101  }
   102  
   103  func (r *optimizedReaderAt) ReadAt(p []byte, off int64) (int, error) {
   104  	// handle magic header
   105  	if len(p) == 4 && off == 0 {
   106  		return copy(p, []byte(magic)), nil
   107  	}
   108  
   109  	// check if the call falls into the footer
   110  	if off >= int64(r.meta.SizeBytes)-int64(r.footerLen) {
   111  		// check if the cache exists
   112  		r.footerLock.RLock()
   113  		cacheExists := r.footerCache != nil && len(*r.footerCache) == int(r.footerLen)
   114  		if cacheExists {
   115  			defer r.footerLock.RUnlock()
   116  			return r.serveFromCache(p, off)
   117  		}
   118  		r.footerLock.RUnlock()
   119  
   120  		// no valid cache found, create one under write lock
   121  		r.footerLock.Lock()
   122  		defer r.footerLock.Unlock()
   123  
   124  		// check again if cache has been populated in the meantime
   125  		cacheExists = r.footerCache != nil && len(*r.footerCache) == int(r.footerLen)
   126  		if cacheExists {
   127  			return r.serveFromCache(p, off)
   128  		}
   129  
   130  		// populate cache
   131  		if r.footerCache == nil {
   132  			r.footerCache = bufferPool.Get().(*[]byte)
   133  		}
   134  		if cap(*r.footerCache) < int(r.footerLen) {
   135  			// grow the buffer if it is too small
   136  			buf := make([]byte, int(r.footerLen))
   137  			r.footerCache = &buf
   138  		} else {
   139  			// reuse the buffer if it is big enough
   140  			*r.footerCache = (*r.footerCache)[:r.footerLen]
   141  		}
   142  
   143  		if n, err := r.ReaderAtCloser.ReadAt(*r.footerCache, int64(r.meta.SizeBytes)-int64(r.footerLen)); err != nil {
   144  			// return to pool
   145  			bufferPool.Put(r.footerCache)
   146  			r.footerCache = nil
   147  			return 0, err
   148  		} else if n != int(r.footerLen) { // check if we got the expected amount of bytes
   149  			// return to pool
   150  			bufferPool.Put(r.footerCache)
   151  			r.footerCache = nil
   152  			return 0, fmt.Errorf("unexpected read length, expected=%d actual=%d", r.footerLen, n)
   153  		}
   154  
   155  		return r.serveFromCache(p, off)
   156  	}
   157  
   158  	// anything else will just read through the optimizer
   159  	return r.ReaderAtCloser.ReadAt(p, off)
   160  }
   161  
   162  // OptimizedBucketReaderAt uses a bucket reader and wraps the optimized reader. Must not be used with non-parquet files.
   163  func OptimizedBucketReaderAt(bucketReader phlareobjstore.BucketReader, ctx context.Context, meta block.File) (phlareobjstore.ReaderAtCloser, error) {
   164  	rc, err := bucketReader.ReaderAt(ctx, meta.RelPath)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	return NewOptimizedReader(rc, meta), nil
   169  }