github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/kv/iterators.go (about) 1 package kv 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 9 "google.golang.org/protobuf/proto" 10 "google.golang.org/protobuf/reflect/protoreflect" 11 ) 12 13 type MessageEntry struct { 14 Key []byte 15 Value protoreflect.ProtoMessage 16 } 17 18 type MessageIterator interface { 19 Next() bool 20 Entry() *MessageEntry 21 Err() error 22 Close() 23 } 24 25 // PrimaryIterator MessageIterator implementation for primary key 26 // The iterator iterates over the given prefix and returns the proto message and key 27 type PrimaryIterator struct { 28 itr EntriesIterator 29 msgType protoreflect.MessageType 30 value *MessageEntry 31 err error 32 } 33 34 // IteratorOptions are the starting point options for PrimaryIterator 35 type IteratorOptions interface { 36 // Start returns the starting point of the iterator 37 Start() []byte 38 39 // IncludeStart determines whether to include Start() value in the iterator 40 IncludeStart() bool 41 } 42 43 // simple inner implementation of IteratorOptions 44 type options struct { 45 start []byte 46 includeStart bool 47 } 48 49 func (o *options) Start() []byte { 50 return o.start 51 } 52 53 func (o *options) IncludeStart() bool { 54 return o.includeStart 55 } 56 57 // IteratorOptionsFrom - returns iterator options from that includes the start key, if exists. 58 func IteratorOptionsFrom(start []byte) IteratorOptions { 59 return &options{start: start, includeStart: true} 60 } 61 62 // IteratorOptionsAfter - returns iterator options from that exclude the start key. 63 func IteratorOptionsAfter(start []byte) IteratorOptions { 64 return &options{start: start, includeStart: false} 65 } 66 67 // NewPrimaryIterator creates a new PrimaryIterator by scanning the store for the given prefix under the partitionKey. 68 // See IteratorOptions for the starting point options. 69 func NewPrimaryIterator(ctx context.Context, store Store, msgType protoreflect.MessageType, partitionKey string, prefix []byte, options IteratorOptions) (*PrimaryIterator, error) { 70 itr, err := ScanPrefix(ctx, store, []byte(partitionKey), prefix, options.Start()) 71 if err != nil { 72 return nil, fmt.Errorf("create prefix iterator: %w", err) 73 } 74 if !options.IncludeStart() { 75 return &PrimaryIterator{itr: NewSkipIterator(itr, options.Start()), msgType: msgType}, nil 76 } 77 return &PrimaryIterator{itr: itr, msgType: msgType}, nil 78 } 79 80 func (i *PrimaryIterator) Next() bool { 81 if i.Err() != nil { 82 return false 83 } 84 i.value = nil 85 if !i.itr.Next() { 86 return false 87 } 88 entry := i.itr.Entry() 89 if entry == nil { 90 i.err = ErrNotFound 91 return false 92 } 93 value := i.msgType.New().Interface() 94 err := proto.Unmarshal(entry.Value, value) 95 if err != nil { 96 i.err = fmt.Errorf("unmarshal proto data for key %s: %w", entry.Key, err) 97 return false 98 } 99 i.value = &MessageEntry{ 100 Key: entry.Key, 101 Value: value, 102 } 103 return true 104 } 105 106 func (i *PrimaryIterator) Entry() *MessageEntry { 107 return i.value 108 } 109 110 func (i *PrimaryIterator) Err() error { 111 if i.err != nil { 112 return i.err 113 } 114 return i.itr.Err() 115 } 116 117 func (i *PrimaryIterator) Close() { 118 i.itr.Close() 119 } 120 121 // SecondaryIterator MessageIterator implementation for secondary key 122 // The iterator iterates over the given prefix, extracts the primary key value from secondary key and then returns 123 // the proto message and primary key 124 type SecondaryIterator struct { 125 ctx context.Context 126 itr PrimaryIterator 127 partitionKey string 128 store Store 129 msgType protoreflect.MessageType 130 value *MessageEntry 131 err error 132 } 133 134 func NewSecondaryIterator(ctx context.Context, store Store, msgType protoreflect.MessageType, partitionKey string, prefix, after []byte) (*SecondaryIterator, error) { 135 itr, err := NewPrimaryIterator(ctx, store, (&SecondaryIndex{}).ProtoReflect().Type(), partitionKey, prefix, IteratorOptionsAfter(after)) 136 if err != nil { 137 return nil, fmt.Errorf("create prefix iterator: %w", err) 138 } 139 return &SecondaryIterator{ctx: ctx, itr: *itr, partitionKey: partitionKey, store: store, msgType: msgType}, nil 140 } 141 142 func (s *SecondaryIterator) Next() bool { 143 if s.Err() != nil { 144 return false 145 } 146 if !s.itr.Next() { 147 return false 148 } 149 secondary := s.itr.Entry() 150 if secondary == nil { 151 s.err = ErrNotFound 152 return false 153 } 154 next := secondary.Value.(*SecondaryIndex) 155 156 var ( 157 primary *ValueWithPredicate 158 err error 159 ) 160 for { 161 primary, err = s.store.Get(s.ctx, []byte(s.partitionKey), next.PrimaryKey) 162 if !errors.Is(err, ErrNotFound) { 163 break 164 } 165 if !s.itr.Next() { 166 return false 167 } 168 secondary = s.itr.Entry() 169 if secondary == nil { 170 s.err = ErrNotFound 171 return false 172 } 173 next = secondary.Value.(*SecondaryIndex) 174 } 175 if err != nil { 176 s.err = fmt.Errorf("getting value from key (primary key %s): %w", next.PrimaryKey, err) 177 return false 178 } 179 value := s.msgType.New().Interface() 180 err = proto.Unmarshal(primary.Value, value) 181 if err != nil { 182 s.err = fmt.Errorf("unmarshal proto data for key %s: %w", next.PrimaryKey, err) 183 return false 184 } 185 s.value = &MessageEntry{ 186 Key: secondary.Key, 187 Value: value, 188 } 189 return true 190 } 191 192 func (s *SecondaryIterator) Entry() *MessageEntry { 193 return s.value 194 } 195 196 func (s *SecondaryIterator) Err() error { 197 if s.err != nil { 198 return s.err 199 } 200 return s.itr.Err() 201 } 202 203 func (s *SecondaryIterator) Close() { 204 s.itr.Close() 205 } 206 207 // SkipFirstIterator will keep the behaviour of the given EntriesIterator, 208 // except for skipping the first Entry if its Key is equal to 'after'. 209 type SkipFirstIterator struct { 210 it EntriesIterator 211 after []byte 212 nextCalled bool 213 } 214 215 func NewSkipIterator(it EntriesIterator, after []byte) EntriesIterator { 216 return &SkipFirstIterator{it: it, after: after} 217 } 218 219 func (si *SkipFirstIterator) Next() bool { 220 if !si.nextCalled { 221 si.nextCalled = true 222 if !si.it.Next() { 223 return false 224 } 225 if !bytes.Equal(si.it.Entry().Key, si.after) { 226 return true 227 } 228 } 229 return si.it.Next() 230 } 231 232 func (si *SkipFirstIterator) SeekGE(key []byte) { 233 si.it.SeekGE(key) 234 } 235 236 func (si *SkipFirstIterator) Entry() *Entry { 237 return si.it.Entry() 238 } 239 240 func (si *SkipFirstIterator) Err() error { 241 return si.it.Err() 242 } 243 244 func (si *SkipFirstIterator) Close() { 245 si.it.Close() 246 } 247 248 // PartitionIterator Used to scan through a whole partition 249 type PartitionIterator struct { 250 ctx context.Context 251 store Store 252 msgType protoreflect.MessageType 253 itr EntriesIterator 254 partitionKey string 255 value *MessageEntry 256 err error 257 batchSize int 258 } 259 260 func NewPartitionIterator(ctx context.Context, store Store, msgType protoreflect.MessageType, partitionKey string, batchSize int) *PartitionIterator { 261 return &PartitionIterator{ 262 ctx: ctx, 263 store: store, 264 msgType: msgType, 265 partitionKey: partitionKey, 266 batchSize: batchSize, 267 } 268 } 269 270 func (p *PartitionIterator) Next() bool { 271 if p.Err() != nil { 272 return false 273 } 274 p.value = nil 275 if p.itr == nil { 276 p.itr, p.err = p.store.Scan(p.ctx, []byte(p.partitionKey), ScanOptions{BatchSize: p.batchSize}) 277 if p.err != nil { 278 p.itr = nil 279 return false 280 } 281 } 282 if !p.itr.Next() { 283 return false 284 } 285 entry := p.itr.Entry() 286 if entry == nil { 287 p.err = ErrMissingValue 288 return false 289 } 290 value := p.msgType.New().Interface() 291 err := proto.Unmarshal(entry.Value, value) 292 if err != nil { 293 p.err = fmt.Errorf("unmarshal proto data for key %s: %w", entry.Key, err) 294 return false 295 } 296 p.value = &MessageEntry{ 297 Key: entry.Key, 298 Value: value, 299 } 300 return true 301 } 302 303 func (p *PartitionIterator) SeekGE(key []byte) { 304 if p.itr == nil { 305 p.itr, p.err = p.store.Scan(p.ctx, []byte(p.partitionKey), ScanOptions{BatchSize: p.batchSize, KeyStart: key}) 306 return 307 } 308 p.itr.SeekGE(key) 309 } 310 311 func (p *PartitionIterator) Entry() *MessageEntry { 312 return p.value 313 } 314 315 func (p *PartitionIterator) Err() error { 316 if p.err != nil { 317 return p.err 318 } 319 if p.itr != nil { 320 return p.itr.Err() 321 } 322 return nil 323 } 324 325 func (p *PartitionIterator) Close() { 326 // Check itr is set, can be null in case seek fails 327 if p.itr != nil { 328 p.itr.Close() 329 p.itr = nil 330 } 331 }