github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/index/reader.go (about) 1 package index 2 3 import ( 4 "bytes" 5 "context" 6 "io" 7 8 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 9 "github.com/pachyderm/pachyderm/src/client/pkg/pbutil" 10 "github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk" 11 "modernc.org/mathutil" 12 ) 13 14 // Reader is used for reading a multilevel index. 15 type Reader struct { 16 chunks *chunk.Storage 17 filter *pathFilter 18 topIdx *Index 19 } 20 21 type pathFilter struct { 22 pathRange *PathRange 23 prefix string 24 } 25 26 // NewReader create a new Reader. 27 func NewReader(chunks *chunk.Storage, topIdx *Index, opts ...Option) *Reader { 28 r := &Reader{ 29 chunks: chunks, 30 topIdx: topIdx, 31 } 32 for _, opt := range opts { 33 opt(r) 34 } 35 return r 36 } 37 38 // Iterate iterates over the indexes. 39 func (r *Reader) Iterate(ctx context.Context, cb func(*Index) error) error { 40 if r.topIdx == nil { 41 return nil 42 } 43 // Setup top level reader. 44 pbr := r.topLevel() 45 levels := []pbutil.Reader{pbr} 46 for { 47 pbr := levels[len(levels)-1] 48 idx := &Index{} 49 if err := pbr.Read(idx); err != nil { 50 if errors.Is(err, io.EOF) { 51 return nil 52 } 53 return err 54 } 55 // Return if done. 56 if r.atEnd(idx.Path) { 57 return nil 58 } 59 // Handle lowest level index. 60 if idx.Range == nil { 61 // Skip to the starting index. 62 if !r.atStart(idx.Path) { 63 continue 64 } 65 resolveParts(idx) 66 if err := cb(idx); err != nil { 67 return err 68 } 69 continue 70 } 71 // Skip to the starting index. 72 if !r.atStart(idx.Range.LastPath) { 73 continue 74 } 75 levels = append(levels, pbutil.NewReader(newLevelReader(ctx, pbr, r.chunks, idx))) 76 } 77 } 78 79 func (r *Reader) topLevel() pbutil.Reader { 80 buf := bytes.Buffer{} 81 pbw := pbutil.NewWriter(&buf) 82 pbw.Write(r.topIdx) 83 return pbutil.NewReader(&buf) 84 } 85 86 // atStart returns true when the name is in the valid range for a filter (always true if no filter is set). 87 // For a range filter, this means the name is >= to the lower bound. 88 // For a prefix filter, this means the name is >= to the prefix. 89 func (r *Reader) atStart(name string) bool { 90 if r.filter == nil { 91 return true 92 } 93 if r.filter.pathRange != nil && r.filter.pathRange.Lower != "" { 94 return name >= r.filter.pathRange.Lower 95 } 96 return name >= r.filter.prefix 97 } 98 99 // atEnd returns true when the name is past the valid range for a filter (always false if no filter is set). 100 // For a range filter, this means the name is > than the upper bound. 101 // For a prefix filter, this means the name does not have the prefix and a name with the prefix cannot show up after it. 102 func (r *Reader) atEnd(name string) bool { 103 if r.filter == nil { 104 return false 105 } 106 if r.filter.pathRange != nil && r.filter.pathRange.Upper != "" { 107 return name > r.filter.pathRange.Upper 108 } 109 // Name is past a prefix when the first len(prefix) bytes are greater than the prefix 110 // (use len(name) bytes for comparison when len(name) < len(prefix)). 111 // A simple greater than check would not suffice here for the prefix filter functionality 112 // (for example, if the index consisted of the paths "a", "ab", "abc", and "b", then a 113 // reader with the prefix filter set to "a" would end at the "ab" path rather than the "b" path). 114 cmpSize := mathutil.Min(len(name), len(r.filter.prefix)) 115 return name[:cmpSize] > r.filter.prefix[:cmpSize] 116 } 117 118 type levelReader struct { 119 ctx context.Context 120 parent pbutil.Reader 121 chunks *chunk.Storage 122 idx *Index 123 buf *bytes.Buffer 124 } 125 126 func newLevelReader(ctx context.Context, parent pbutil.Reader, chunks *chunk.Storage, idx *Index) *levelReader { 127 return &levelReader{ 128 ctx: ctx, 129 parent: parent, 130 chunks: chunks, 131 idx: idx, 132 } 133 } 134 135 // Read reads data from an index level. 136 func (lr *levelReader) Read(data []byte) (int, error) { 137 if err := lr.setup(); err != nil { 138 return 0, err 139 } 140 var bytesRead int 141 for len(data) > 0 { 142 if lr.buf.Len() == 0 { 143 if err := lr.next(); err != nil { 144 return bytesRead, err 145 } 146 } 147 n, _ := lr.buf.Read(data) 148 bytesRead += n 149 data = data[n:] 150 } 151 return bytesRead, nil 152 } 153 154 func (lr *levelReader) setup() error { 155 if lr.buf == nil { 156 r := lr.chunks.NewReader(lr.ctx, []*chunk.DataRef{lr.idx.Range.ChunkRef}) 157 lr.buf = &bytes.Buffer{} 158 if err := r.Get(lr.buf); err != nil { 159 return err 160 } 161 // Skip offset bytes to get to first index entry in chunk. 162 lr.buf = bytes.NewBuffer(lr.buf.Bytes()[lr.idx.Range.Offset:]) 163 } 164 return nil 165 } 166 167 func (lr *levelReader) next() error { 168 lr.idx.Reset() 169 if err := lr.parent.Read(lr.idx); err != nil { 170 return err 171 } 172 r := lr.chunks.NewReader(lr.ctx, []*chunk.DataRef{lr.idx.Range.ChunkRef}) 173 lr.buf.Reset() 174 return r.Get(lr.buf) 175 }