github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/index/reader.go (about)

     1  package index
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"io"
     7  
     8  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
     9  	"github.com/pachyderm/pachyderm/src/client/pkg/pbutil"
    10  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk"
    11  	"modernc.org/mathutil"
    12  )
    13  
    14  // Reader is used for reading a multilevel index.
    15  type Reader struct {
    16  	chunks *chunk.Storage
    17  	filter *pathFilter
    18  	topIdx *Index
    19  }
    20  
    21  type pathFilter struct {
    22  	pathRange *PathRange
    23  	prefix    string
    24  }
    25  
    26  // NewReader create a new Reader.
    27  func NewReader(chunks *chunk.Storage, topIdx *Index, opts ...Option) *Reader {
    28  	r := &Reader{
    29  		chunks: chunks,
    30  		topIdx: topIdx,
    31  	}
    32  	for _, opt := range opts {
    33  		opt(r)
    34  	}
    35  	return r
    36  }
    37  
    38  // Iterate iterates over the indexes.
    39  func (r *Reader) Iterate(ctx context.Context, cb func(*Index) error) error {
    40  	if r.topIdx == nil {
    41  		return nil
    42  	}
    43  	// Setup top level reader.
    44  	pbr := r.topLevel()
    45  	levels := []pbutil.Reader{pbr}
    46  	for {
    47  		pbr := levels[len(levels)-1]
    48  		idx := &Index{}
    49  		if err := pbr.Read(idx); err != nil {
    50  			if errors.Is(err, io.EOF) {
    51  				return nil
    52  			}
    53  			return err
    54  		}
    55  		// Return if done.
    56  		if r.atEnd(idx.Path) {
    57  			return nil
    58  		}
    59  		// Handle lowest level index.
    60  		if idx.Range == nil {
    61  			// Skip to the starting index.
    62  			if !r.atStart(idx.Path) {
    63  				continue
    64  			}
    65  			resolveParts(idx)
    66  			if err := cb(idx); err != nil {
    67  				return err
    68  			}
    69  			continue
    70  		}
    71  		// Skip to the starting index.
    72  		if !r.atStart(idx.Range.LastPath) {
    73  			continue
    74  		}
    75  		levels = append(levels, pbutil.NewReader(newLevelReader(ctx, pbr, r.chunks, idx)))
    76  	}
    77  }
    78  
    79  func (r *Reader) topLevel() pbutil.Reader {
    80  	buf := bytes.Buffer{}
    81  	pbw := pbutil.NewWriter(&buf)
    82  	pbw.Write(r.topIdx)
    83  	return pbutil.NewReader(&buf)
    84  }
    85  
    86  // atStart returns true when the name is in the valid range for a filter (always true if no filter is set).
    87  // For a range filter, this means the name is >= to the lower bound.
    88  // For a prefix filter, this means the name is >= to the prefix.
    89  func (r *Reader) atStart(name string) bool {
    90  	if r.filter == nil {
    91  		return true
    92  	}
    93  	if r.filter.pathRange != nil && r.filter.pathRange.Lower != "" {
    94  		return name >= r.filter.pathRange.Lower
    95  	}
    96  	return name >= r.filter.prefix
    97  }
    98  
    99  // atEnd returns true when the name is past the valid range for a filter (always false if no filter is set).
   100  // For a range filter, this means the name is > than the upper bound.
   101  // For a prefix filter, this means the name does not have the prefix and a name with the prefix cannot show up after it.
   102  func (r *Reader) atEnd(name string) bool {
   103  	if r.filter == nil {
   104  		return false
   105  	}
   106  	if r.filter.pathRange != nil && r.filter.pathRange.Upper != "" {
   107  		return name > r.filter.pathRange.Upper
   108  	}
   109  	// Name is past a prefix when the first len(prefix) bytes are greater than the prefix
   110  	// (use len(name) bytes for comparison when len(name) < len(prefix)).
   111  	// A simple greater than check would not suffice here for the prefix filter functionality
   112  	// (for example, if the index consisted of the paths "a", "ab", "abc", and "b", then a
   113  	// reader with the prefix filter set to "a" would end at the "ab" path rather than the "b" path).
   114  	cmpSize := mathutil.Min(len(name), len(r.filter.prefix))
   115  	return name[:cmpSize] > r.filter.prefix[:cmpSize]
   116  }
   117  
   118  type levelReader struct {
   119  	ctx    context.Context
   120  	parent pbutil.Reader
   121  	chunks *chunk.Storage
   122  	idx    *Index
   123  	buf    *bytes.Buffer
   124  }
   125  
   126  func newLevelReader(ctx context.Context, parent pbutil.Reader, chunks *chunk.Storage, idx *Index) *levelReader {
   127  	return &levelReader{
   128  		ctx:    ctx,
   129  		parent: parent,
   130  		chunks: chunks,
   131  		idx:    idx,
   132  	}
   133  }
   134  
   135  // Read reads data from an index level.
   136  func (lr *levelReader) Read(data []byte) (int, error) {
   137  	if err := lr.setup(); err != nil {
   138  		return 0, err
   139  	}
   140  	var bytesRead int
   141  	for len(data) > 0 {
   142  		if lr.buf.Len() == 0 {
   143  			if err := lr.next(); err != nil {
   144  				return bytesRead, err
   145  			}
   146  		}
   147  		n, _ := lr.buf.Read(data)
   148  		bytesRead += n
   149  		data = data[n:]
   150  	}
   151  	return bytesRead, nil
   152  }
   153  
   154  func (lr *levelReader) setup() error {
   155  	if lr.buf == nil {
   156  		r := lr.chunks.NewReader(lr.ctx, []*chunk.DataRef{lr.idx.Range.ChunkRef})
   157  		lr.buf = &bytes.Buffer{}
   158  		if err := r.Get(lr.buf); err != nil {
   159  			return err
   160  		}
   161  		// Skip offset bytes to get to first index entry in chunk.
   162  		lr.buf = bytes.NewBuffer(lr.buf.Bytes()[lr.idx.Range.Offset:])
   163  	}
   164  	return nil
   165  }
   166  
   167  func (lr *levelReader) next() error {
   168  	lr.idx.Reset()
   169  	if err := lr.parent.Read(lr.idx); err != nil {
   170  		return err
   171  	}
   172  	r := lr.chunks.NewReader(lr.ctx, []*chunk.DataRef{lr.idx.Range.ChunkRef})
   173  	lr.buf.Reset()
   174  	return r.Get(lr.buf)
   175  }