github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/chunk/reader.go (about)

     1  package chunk
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"io"
     7  	"sync"
     8  
     9  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    10  	"github.com/pachyderm/pachyderm/src/server/pkg/errutil"
    11  )
    12  
    13  // Reader reads data from chunk storage.
    14  type Reader struct {
    15  	ctx      context.Context
    16  	client   *Client
    17  	dataRefs []*DataRef
    18  }
    19  
    20  func newReader(ctx context.Context, client *Client, dataRefs []*DataRef) *Reader {
    21  	return &Reader{
    22  		ctx:      ctx,
    23  		client:   client,
    24  		dataRefs: dataRefs,
    25  	}
    26  }
    27  
    28  // Iterate iterates over the data readers for the data references.
    29  func (r *Reader) Iterate(cb func(*DataReader) error) error {
    30  	var seed *DataReader
    31  	for _, dataRef := range r.dataRefs {
    32  		dr := newDataReader(r.ctx, r.client, dataRef, seed)
    33  		if err := cb(dr); err != nil {
    34  			if errors.Is(err, errutil.ErrBreak) {
    35  				return nil
    36  			}
    37  			return err
    38  		}
    39  		seed = dr
    40  	}
    41  	return nil
    42  }
    43  
    44  // Get writes the concatenation of the data referenced by the data references.
    45  func (r *Reader) Get(w io.Writer) error {
    46  	return r.Iterate(func(dr *DataReader) error {
    47  		return dr.Get(w)
    48  	})
    49  }
    50  
    51  // DataReader is an abstraction that lazily reads data referenced by a data reference.
    52  // The seed is set to avoid re-downloading a chunk that is shared between this data reference
    53  // and the prior in a chain of data references.
    54  // TODO: Probably don't need seed with caching.
    55  type DataReader struct {
    56  	ctx        context.Context
    57  	client     *Client
    58  	dataRef    *DataRef
    59  	seed       *DataReader
    60  	getChunkMu sync.Mutex
    61  	chunk      []byte
    62  }
    63  
    64  func newDataReader(ctx context.Context, client *Client, dataRef *DataRef, seed *DataReader) *DataReader {
    65  	return &DataReader{
    66  		ctx:     ctx,
    67  		client:  client,
    68  		dataRef: dataRef,
    69  		seed:    seed,
    70  	}
    71  }
    72  
    73  // DataRef returns the data reference associated with this data reader.
    74  func (dr *DataReader) DataRef() *DataRef {
    75  	return dr.dataRef
    76  }
    77  
    78  // Get writes the data referenced by the data reference.
    79  func (dr *DataReader) Get(w io.Writer) error {
    80  	if err := dr.getChunk(); err != nil {
    81  		return err
    82  	}
    83  	data := dr.chunk[dr.dataRef.OffsetBytes : dr.dataRef.OffsetBytes+dr.dataRef.SizeBytes]
    84  	_, err := w.Write(data)
    85  	return err
    86  }
    87  
    88  func (dr *DataReader) getChunk() error {
    89  	dr.getChunkMu.Lock()
    90  	defer dr.getChunkMu.Unlock()
    91  	if dr.chunk != nil {
    92  		return nil
    93  	}
    94  	// Use seed chunk if possible.
    95  	if dr.seed != nil && bytes.Equal(dr.dataRef.Ref.Id, dr.seed.dataRef.Ref.Id) {
    96  		if err := dr.seed.getChunk(); err != nil {
    97  			return err
    98  		}
    99  		dr.chunk = dr.seed.chunk
   100  		return nil
   101  	}
   102  	// Get chunk from object storage.
   103  	buf := &bytes.Buffer{}
   104  	chunkID := dr.dataRef.Ref.Id
   105  	if err := dr.client.Get(dr.ctx, chunkID, buf); err != nil {
   106  		return err
   107  	}
   108  	dr.chunk = buf.Bytes()
   109  	return nil
   110  }