github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/chunk/reader.go (about) 1 package chunk 2 3 import ( 4 "bytes" 5 "context" 6 "io" 7 "sync" 8 9 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 10 "github.com/pachyderm/pachyderm/src/server/pkg/errutil" 11 ) 12 13 // Reader reads data from chunk storage. 14 type Reader struct { 15 ctx context.Context 16 client *Client 17 dataRefs []*DataRef 18 } 19 20 func newReader(ctx context.Context, client *Client, dataRefs []*DataRef) *Reader { 21 return &Reader{ 22 ctx: ctx, 23 client: client, 24 dataRefs: dataRefs, 25 } 26 } 27 28 // Iterate iterates over the data readers for the data references. 29 func (r *Reader) Iterate(cb func(*DataReader) error) error { 30 var seed *DataReader 31 for _, dataRef := range r.dataRefs { 32 dr := newDataReader(r.ctx, r.client, dataRef, seed) 33 if err := cb(dr); err != nil { 34 if errors.Is(err, errutil.ErrBreak) { 35 return nil 36 } 37 return err 38 } 39 seed = dr 40 } 41 return nil 42 } 43 44 // Get writes the concatenation of the data referenced by the data references. 45 func (r *Reader) Get(w io.Writer) error { 46 return r.Iterate(func(dr *DataReader) error { 47 return dr.Get(w) 48 }) 49 } 50 51 // DataReader is an abstraction that lazily reads data referenced by a data reference. 52 // The seed is set to avoid re-downloading a chunk that is shared between this data reference 53 // and the prior in a chain of data references. 54 // TODO: Probably don't need seed with caching. 55 type DataReader struct { 56 ctx context.Context 57 client *Client 58 dataRef *DataRef 59 seed *DataReader 60 getChunkMu sync.Mutex 61 chunk []byte 62 } 63 64 func newDataReader(ctx context.Context, client *Client, dataRef *DataRef, seed *DataReader) *DataReader { 65 return &DataReader{ 66 ctx: ctx, 67 client: client, 68 dataRef: dataRef, 69 seed: seed, 70 } 71 } 72 73 // DataRef returns the data reference associated with this data reader. 74 func (dr *DataReader) DataRef() *DataRef { 75 return dr.dataRef 76 } 77 78 // Get writes the data referenced by the data reference. 79 func (dr *DataReader) Get(w io.Writer) error { 80 if err := dr.getChunk(); err != nil { 81 return err 82 } 83 data := dr.chunk[dr.dataRef.OffsetBytes : dr.dataRef.OffsetBytes+dr.dataRef.SizeBytes] 84 _, err := w.Write(data) 85 return err 86 } 87 88 func (dr *DataReader) getChunk() error { 89 dr.getChunkMu.Lock() 90 defer dr.getChunkMu.Unlock() 91 if dr.chunk != nil { 92 return nil 93 } 94 // Use seed chunk if possible. 95 if dr.seed != nil && bytes.Equal(dr.dataRef.Ref.Id, dr.seed.dataRef.Ref.Id) { 96 if err := dr.seed.getChunk(); err != nil { 97 return err 98 } 99 dr.chunk = dr.seed.chunk 100 return nil 101 } 102 // Get chunk from object storage. 103 buf := &bytes.Buffer{} 104 chunkID := dr.dataRef.Ref.Id 105 if err := dr.client.Get(dr.ctx, chunkID, buf); err != nil { 106 return err 107 } 108 dr.chunk = buf.Bytes() 109 return nil 110 }