github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/kv/iterators.go (about)

     1  package kv
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  
     9  	"google.golang.org/protobuf/proto"
    10  	"google.golang.org/protobuf/reflect/protoreflect"
    11  )
    12  
    13  type MessageEntry struct {
    14  	Key   []byte
    15  	Value protoreflect.ProtoMessage
    16  }
    17  
    18  type MessageIterator interface {
    19  	Next() bool
    20  	Entry() *MessageEntry
    21  	Err() error
    22  	Close()
    23  }
    24  
    25  // PrimaryIterator MessageIterator implementation for primary key
    26  // The iterator iterates over the given prefix and returns the proto message and key
    27  type PrimaryIterator struct {
    28  	itr     EntriesIterator
    29  	msgType protoreflect.MessageType
    30  	value   *MessageEntry
    31  	err     error
    32  }
    33  
    34  // IteratorOptions are the starting point options for PrimaryIterator
    35  type IteratorOptions interface {
    36  	// Start returns the starting point of the iterator
    37  	Start() []byte
    38  
    39  	// IncludeStart determines whether to include Start() value in the iterator
    40  	IncludeStart() bool
    41  }
    42  
    43  // simple inner implementation of IteratorOptions
    44  type options struct {
    45  	start        []byte
    46  	includeStart bool
    47  }
    48  
    49  func (o *options) Start() []byte {
    50  	return o.start
    51  }
    52  
    53  func (o *options) IncludeStart() bool {
    54  	return o.includeStart
    55  }
    56  
    57  // IteratorOptionsFrom - returns iterator options from that includes the start key, if exists.
    58  func IteratorOptionsFrom(start []byte) IteratorOptions {
    59  	return &options{start: start, includeStart: true}
    60  }
    61  
    62  // IteratorOptionsAfter - returns iterator options from that exclude the start key.
    63  func IteratorOptionsAfter(start []byte) IteratorOptions {
    64  	return &options{start: start, includeStart: false}
    65  }
    66  
    67  // NewPrimaryIterator creates a new PrimaryIterator by scanning the store for the given prefix under the partitionKey.
    68  // See IteratorOptions for the starting point options.
    69  func NewPrimaryIterator(ctx context.Context, store Store, msgType protoreflect.MessageType, partitionKey string, prefix []byte, options IteratorOptions) (*PrimaryIterator, error) {
    70  	itr, err := ScanPrefix(ctx, store, []byte(partitionKey), prefix, options.Start())
    71  	if err != nil {
    72  		return nil, fmt.Errorf("create prefix iterator: %w", err)
    73  	}
    74  	if !options.IncludeStart() {
    75  		return &PrimaryIterator{itr: NewSkipIterator(itr, options.Start()), msgType: msgType}, nil
    76  	}
    77  	return &PrimaryIterator{itr: itr, msgType: msgType}, nil
    78  }
    79  
    80  func (i *PrimaryIterator) Next() bool {
    81  	if i.Err() != nil {
    82  		return false
    83  	}
    84  	i.value = nil
    85  	if !i.itr.Next() {
    86  		return false
    87  	}
    88  	entry := i.itr.Entry()
    89  	if entry == nil {
    90  		i.err = ErrNotFound
    91  		return false
    92  	}
    93  	value := i.msgType.New().Interface()
    94  	err := proto.Unmarshal(entry.Value, value)
    95  	if err != nil {
    96  		i.err = fmt.Errorf("unmarshal proto data for key %s: %w", entry.Key, err)
    97  		return false
    98  	}
    99  	i.value = &MessageEntry{
   100  		Key:   entry.Key,
   101  		Value: value,
   102  	}
   103  	return true
   104  }
   105  
   106  func (i *PrimaryIterator) Entry() *MessageEntry {
   107  	return i.value
   108  }
   109  
   110  func (i *PrimaryIterator) Err() error {
   111  	if i.err != nil {
   112  		return i.err
   113  	}
   114  	return i.itr.Err()
   115  }
   116  
   117  func (i *PrimaryIterator) Close() {
   118  	i.itr.Close()
   119  }
   120  
   121  // SecondaryIterator MessageIterator implementation for secondary key
   122  // The iterator iterates over the given prefix, extracts the primary key value from secondary key and then returns
   123  // the proto message and primary key
   124  type SecondaryIterator struct {
   125  	ctx          context.Context
   126  	itr          PrimaryIterator
   127  	partitionKey string
   128  	store        Store
   129  	msgType      protoreflect.MessageType
   130  	value        *MessageEntry
   131  	err          error
   132  }
   133  
   134  func NewSecondaryIterator(ctx context.Context, store Store, msgType protoreflect.MessageType, partitionKey string, prefix, after []byte) (*SecondaryIterator, error) {
   135  	itr, err := NewPrimaryIterator(ctx, store, (&SecondaryIndex{}).ProtoReflect().Type(), partitionKey, prefix, IteratorOptionsAfter(after))
   136  	if err != nil {
   137  		return nil, fmt.Errorf("create prefix iterator: %w", err)
   138  	}
   139  	return &SecondaryIterator{ctx: ctx, itr: *itr, partitionKey: partitionKey, store: store, msgType: msgType}, nil
   140  }
   141  
   142  func (s *SecondaryIterator) Next() bool {
   143  	if s.Err() != nil {
   144  		return false
   145  	}
   146  	if !s.itr.Next() {
   147  		return false
   148  	}
   149  	secondary := s.itr.Entry()
   150  	if secondary == nil {
   151  		s.err = ErrNotFound
   152  		return false
   153  	}
   154  	next := secondary.Value.(*SecondaryIndex)
   155  
   156  	var (
   157  		primary *ValueWithPredicate
   158  		err     error
   159  	)
   160  	for {
   161  		primary, err = s.store.Get(s.ctx, []byte(s.partitionKey), next.PrimaryKey)
   162  		if !errors.Is(err, ErrNotFound) {
   163  			break
   164  		}
   165  		if !s.itr.Next() {
   166  			return false
   167  		}
   168  		secondary = s.itr.Entry()
   169  		if secondary == nil {
   170  			s.err = ErrNotFound
   171  			return false
   172  		}
   173  		next = secondary.Value.(*SecondaryIndex)
   174  	}
   175  	if err != nil {
   176  		s.err = fmt.Errorf("getting value from key (primary key %s): %w", next.PrimaryKey, err)
   177  		return false
   178  	}
   179  	value := s.msgType.New().Interface()
   180  	err = proto.Unmarshal(primary.Value, value)
   181  	if err != nil {
   182  		s.err = fmt.Errorf("unmarshal proto data for key %s: %w", next.PrimaryKey, err)
   183  		return false
   184  	}
   185  	s.value = &MessageEntry{
   186  		Key:   secondary.Key,
   187  		Value: value,
   188  	}
   189  	return true
   190  }
   191  
   192  func (s *SecondaryIterator) Entry() *MessageEntry {
   193  	return s.value
   194  }
   195  
   196  func (s *SecondaryIterator) Err() error {
   197  	if s.err != nil {
   198  		return s.err
   199  	}
   200  	return s.itr.Err()
   201  }
   202  
   203  func (s *SecondaryIterator) Close() {
   204  	s.itr.Close()
   205  }
   206  
   207  // SkipFirstIterator will keep the behaviour of the given EntriesIterator,
   208  // except for skipping the first Entry if its Key is equal to 'after'.
   209  type SkipFirstIterator struct {
   210  	it         EntriesIterator
   211  	after      []byte
   212  	nextCalled bool
   213  }
   214  
   215  func NewSkipIterator(it EntriesIterator, after []byte) EntriesIterator {
   216  	return &SkipFirstIterator{it: it, after: after}
   217  }
   218  
   219  func (si *SkipFirstIterator) Next() bool {
   220  	if !si.nextCalled {
   221  		si.nextCalled = true
   222  		if !si.it.Next() {
   223  			return false
   224  		}
   225  		if !bytes.Equal(si.it.Entry().Key, si.after) {
   226  			return true
   227  		}
   228  	}
   229  	return si.it.Next()
   230  }
   231  
   232  func (si *SkipFirstIterator) SeekGE(key []byte) {
   233  	si.it.SeekGE(key)
   234  }
   235  
   236  func (si *SkipFirstIterator) Entry() *Entry {
   237  	return si.it.Entry()
   238  }
   239  
   240  func (si *SkipFirstIterator) Err() error {
   241  	return si.it.Err()
   242  }
   243  
   244  func (si *SkipFirstIterator) Close() {
   245  	si.it.Close()
   246  }
   247  
   248  // PartitionIterator Used to scan through a whole partition
   249  type PartitionIterator struct {
   250  	ctx          context.Context
   251  	store        Store
   252  	msgType      protoreflect.MessageType
   253  	itr          EntriesIterator
   254  	partitionKey string
   255  	value        *MessageEntry
   256  	err          error
   257  	batchSize    int
   258  }
   259  
   260  func NewPartitionIterator(ctx context.Context, store Store, msgType protoreflect.MessageType, partitionKey string, batchSize int) *PartitionIterator {
   261  	return &PartitionIterator{
   262  		ctx:          ctx,
   263  		store:        store,
   264  		msgType:      msgType,
   265  		partitionKey: partitionKey,
   266  		batchSize:    batchSize,
   267  	}
   268  }
   269  
   270  func (p *PartitionIterator) Next() bool {
   271  	if p.Err() != nil {
   272  		return false
   273  	}
   274  	p.value = nil
   275  	if p.itr == nil {
   276  		p.itr, p.err = p.store.Scan(p.ctx, []byte(p.partitionKey), ScanOptions{BatchSize: p.batchSize})
   277  		if p.err != nil {
   278  			p.itr = nil
   279  			return false
   280  		}
   281  	}
   282  	if !p.itr.Next() {
   283  		return false
   284  	}
   285  	entry := p.itr.Entry()
   286  	if entry == nil {
   287  		p.err = ErrMissingValue
   288  		return false
   289  	}
   290  	value := p.msgType.New().Interface()
   291  	err := proto.Unmarshal(entry.Value, value)
   292  	if err != nil {
   293  		p.err = fmt.Errorf("unmarshal proto data for key %s: %w", entry.Key, err)
   294  		return false
   295  	}
   296  	p.value = &MessageEntry{
   297  		Key:   entry.Key,
   298  		Value: value,
   299  	}
   300  	return true
   301  }
   302  
   303  func (p *PartitionIterator) SeekGE(key []byte) {
   304  	if p.itr == nil {
   305  		p.itr, p.err = p.store.Scan(p.ctx, []byte(p.partitionKey), ScanOptions{BatchSize: p.batchSize, KeyStart: key})
   306  		return
   307  	}
   308  	p.itr.SeekGE(key)
   309  }
   310  
   311  func (p *PartitionIterator) Entry() *MessageEntry {
   312  	return p.value
   313  }
   314  
   315  func (p *PartitionIterator) Err() error {
   316  	if p.err != nil {
   317  		return p.err
   318  	}
   319  	if p.itr != nil {
   320  		return p.itr.Err()
   321  	}
   322  	return nil
   323  }
   324  
   325  func (p *PartitionIterator) Close() {
   326  	// Check itr is set, can be null in case seek fails
   327  	if p.itr != nil {
   328  		p.itr.Close()
   329  		p.itr = nil
   330  	}
   331  }