github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/badger/operation/common.go (about) 1 package operation 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 8 "github.com/dgraph-io/badger/v2" 9 "github.com/vmihailenco/msgpack/v4" 10 11 "github.com/onflow/flow-go/model/flow" 12 "github.com/onflow/flow-go/module/irrecoverable" 13 "github.com/onflow/flow-go/storage" 14 ) 15 16 // batchWrite will encode the given entity using msgpack and will upsert the resulting 17 // binary data in the badger wrote batch under the provided key - if the value already exists 18 // in the database it will be overridden. 19 // No errors are expected during normal operation. 20 func batchWrite(key []byte, entity interface{}) func(writeBatch *badger.WriteBatch) error { 21 return func(writeBatch *badger.WriteBatch) error { 22 23 // update the maximum key size if the inserted key is bigger 24 if uint32(len(key)) > max { 25 max = uint32(len(key)) 26 err := SetMax(writeBatch) 27 if err != nil { 28 return fmt.Errorf("could not update max tracker: %w", err) 29 } 30 } 31 32 // serialize the entity data 33 val, err := msgpack.Marshal(entity) 34 if err != nil { 35 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 36 } 37 38 // persist the entity data into the DB 39 err = writeBatch.Set(key, val) 40 if err != nil { 41 return irrecoverable.NewExceptionf("could not store data: %w", err) 42 } 43 return nil 44 } 45 } 46 47 // insert will encode the given entity using msgpack and will insert the resulting 48 // binary data in the badger DB under the provided key. It will error if the 49 // key already exists. 50 // Error returns: 51 // - storage.ErrAlreadyExists if the key already exists in the database. 52 // - generic error in case of unexpected failure from the database layer or 53 // encoding failure. 54 func insert(key []byte, entity interface{}) func(*badger.Txn) error { 55 return func(tx *badger.Txn) error { 56 57 // update the maximum key size if the inserted key is bigger 58 if uint32(len(key)) > max { 59 max = uint32(len(key)) 60 err := SetMax(tx) 61 if err != nil { 62 return fmt.Errorf("could not update max tracker: %w", err) 63 } 64 } 65 66 // check if the key already exists in the db 67 _, err := tx.Get(key) 68 if err == nil { 69 return storage.ErrAlreadyExists 70 } 71 72 if !errors.Is(err, badger.ErrKeyNotFound) { 73 return irrecoverable.NewExceptionf("could not retrieve key: %w", err) 74 } 75 76 // serialize the entity data 77 val, err := msgpack.Marshal(entity) 78 if err != nil { 79 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 80 } 81 82 // persist the entity data into the DB 83 err = tx.Set(key, val) 84 if err != nil { 85 return irrecoverable.NewExceptionf("could not store data: %w", err) 86 } 87 return nil 88 } 89 } 90 91 // update will encode the given entity with MsgPack and update the binary data 92 // under the given key in the badger DB. The key must already exist. 93 // Error returns: 94 // - storage.ErrNotFound if the key does not already exist in the database. 95 // - generic error in case of unexpected failure from the database layer or 96 // encoding failure. 97 func update(key []byte, entity interface{}) func(*badger.Txn) error { 98 return func(tx *badger.Txn) error { 99 100 // retrieve the item from the key-value store 101 _, err := tx.Get(key) 102 if errors.Is(err, badger.ErrKeyNotFound) { 103 return storage.ErrNotFound 104 } 105 if err != nil { 106 return irrecoverable.NewExceptionf("could not check key: %w", err) 107 } 108 109 // serialize the entity data 110 val, err := msgpack.Marshal(entity) 111 if err != nil { 112 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 113 } 114 115 // persist the entity data into the DB 116 err = tx.Set(key, val) 117 if err != nil { 118 return irrecoverable.NewExceptionf("could not replace data: %w", err) 119 } 120 121 return nil 122 } 123 } 124 125 // upsert will encode the given entity with MsgPack and upsert the binary data 126 // under the given key in the badger DB. 127 func upsert(key []byte, entity interface{}) func(*badger.Txn) error { 128 return func(tx *badger.Txn) error { 129 // update the maximum key size if the inserted key is bigger 130 if uint32(len(key)) > max { 131 max = uint32(len(key)) 132 err := SetMax(tx) 133 if err != nil { 134 return fmt.Errorf("could not update max tracker: %w", err) 135 } 136 } 137 138 // serialize the entity data 139 val, err := msgpack.Marshal(entity) 140 if err != nil { 141 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 142 } 143 144 // persist the entity data into the DB 145 err = tx.Set(key, val) 146 if err != nil { 147 return irrecoverable.NewExceptionf("could not upsert data: %w", err) 148 } 149 150 return nil 151 } 152 } 153 154 // remove removes the entity with the given key, if it exists. If it doesn't 155 // exist, this is a no-op. 156 // Error returns: 157 // * storage.ErrNotFound if the key to delete does not exist. 158 // * generic error in case of unexpected database error 159 func remove(key []byte) func(*badger.Txn) error { 160 return func(tx *badger.Txn) error { 161 // retrieve the item from the key-value store 162 _, err := tx.Get(key) 163 if err != nil { 164 if errors.Is(err, badger.ErrKeyNotFound) { 165 return storage.ErrNotFound 166 } 167 return irrecoverable.NewExceptionf("could not check key: %w", err) 168 } 169 170 err = tx.Delete(key) 171 if err != nil { 172 return irrecoverable.NewExceptionf("could not delete item: %w", err) 173 } 174 return nil 175 } 176 } 177 178 // batchRemove removes entry under a given key in a write-batch. 179 // if key doesn't exist, does nothing. 180 // No errors are expected during normal operation. 181 func batchRemove(key []byte) func(writeBatch *badger.WriteBatch) error { 182 return func(writeBatch *badger.WriteBatch) error { 183 err := writeBatch.Delete(key) 184 if err != nil { 185 return irrecoverable.NewExceptionf("could not batch delete data: %w", err) 186 } 187 return nil 188 } 189 } 190 191 // removeByPrefix removes all the entities if the prefix of the key matches the given prefix. 192 // if no key matches, this is a no-op 193 // No errors are expected during normal operation. 194 func removeByPrefix(prefix []byte) func(*badger.Txn) error { 195 return func(tx *badger.Txn) error { 196 opts := badger.DefaultIteratorOptions 197 opts.AllVersions = false 198 opts.PrefetchValues = false 199 it := tx.NewIterator(opts) 200 defer it.Close() 201 202 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 203 key := it.Item().KeyCopy(nil) 204 err := tx.Delete(key) 205 if err != nil { 206 return irrecoverable.NewExceptionf("could not delete item with prefix: %w", err) 207 } 208 } 209 210 return nil 211 } 212 } 213 214 // batchRemoveByPrefix removes all items under the keys match the given prefix in a batch write transaction. 215 // no error would be returned if no key was found with the given prefix. 216 // all error returned should be exception 217 func batchRemoveByPrefix(prefix []byte) func(tx *badger.Txn, writeBatch *badger.WriteBatch) error { 218 return func(tx *badger.Txn, writeBatch *badger.WriteBatch) error { 219 220 opts := badger.DefaultIteratorOptions 221 opts.AllVersions = false 222 opts.PrefetchValues = false 223 it := tx.NewIterator(opts) 224 defer it.Close() 225 226 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 227 key := it.Item().KeyCopy(nil) 228 err := writeBatch.Delete(key) 229 if err != nil { 230 return irrecoverable.NewExceptionf("could not delete item in batch: %w", err) 231 } 232 } 233 return nil 234 } 235 } 236 237 // retrieve will retrieve the binary data under the given key from the badger DB 238 // and decode it into the given entity. The provided entity needs to be a 239 // pointer to an initialized entity of the correct type. 240 // Error returns: 241 // - storage.ErrNotFound if the key does not exist in the database 242 // - generic error in case of unexpected failure from the database layer, or failure 243 // to decode an existing database value 244 func retrieve(key []byte, entity interface{}) func(*badger.Txn) error { 245 return func(tx *badger.Txn) error { 246 247 // retrieve the item from the key-value store 248 item, err := tx.Get(key) 249 if errors.Is(err, badger.ErrKeyNotFound) { 250 return storage.ErrNotFound 251 } 252 if err != nil { 253 return irrecoverable.NewExceptionf("could not load data: %w", err) 254 } 255 256 // get the value from the item 257 err = item.Value(func(val []byte) error { 258 err := msgpack.Unmarshal(val, entity) 259 return err 260 }) 261 if err != nil { 262 return irrecoverable.NewExceptionf("could not decode entity: %w", err) 263 } 264 265 return nil 266 } 267 } 268 269 // exists returns true if a key exists in the database. 270 // No errors are expected during normal operation. 271 func exists(key []byte, keyExists *bool) func(*badger.Txn) error { 272 return func(tx *badger.Txn) error { 273 _, err := tx.Get(key) 274 if err != nil { 275 // the key does not exist in the database 276 if errors.Is(err, badger.ErrKeyNotFound) { 277 *keyExists = false 278 return nil 279 } 280 // exception while checking for the key 281 return irrecoverable.NewExceptionf("could not load data: %w", err) 282 } 283 284 // the key does exist in the database 285 *keyExists = true 286 return nil 287 } 288 } 289 290 // checkFunc is called during key iteration through the badger DB in order to 291 // check whether we should process the given key-value pair. It can be used to 292 // avoid loading the value if its not of interest, as well as storing the key 293 // for the current iteration step. 294 type checkFunc func(key []byte) bool 295 296 // createFunc returns a pointer to an initialized entity that we can potentially 297 // decode the next value into during a badger DB iteration. 298 type createFunc func() interface{} 299 300 // handleFunc is a function that starts the processing of the current key-value 301 // pair during a badger iteration. It should be called after the key was checked 302 // and the entity was decoded. 303 // No errors are expected during normal operation. Any errors will halt the iteration. 304 type handleFunc func() error 305 306 // iterationFunc is a function provided to our low-level iteration function that 307 // allows us to pass badger efficiencies across badger boundaries. By calling it 308 // for each iteration step, we can inject a function to check the key, a 309 // function to create the decode target and a function to process the current 310 // key-value pair. This a consumer of the API to decode when to skip the loading 311 // of values, the initialization of entities and the processing. 312 type iterationFunc func() (checkFunc, createFunc, handleFunc) 313 314 // lookup is the default iteration function allowing us to collect a list of 315 // entity IDs from an index. 316 func lookup(entityIDs *[]flow.Identifier) func() (checkFunc, createFunc, handleFunc) { 317 *entityIDs = make([]flow.Identifier, 0, len(*entityIDs)) 318 return func() (checkFunc, createFunc, handleFunc) { 319 check := func(key []byte) bool { 320 return true 321 } 322 var entityID flow.Identifier 323 create := func() interface{} { 324 return &entityID 325 } 326 handle := func() error { 327 *entityIDs = append(*entityIDs, entityID) 328 return nil 329 } 330 return check, create, handle 331 } 332 } 333 334 // withPrefetchValuesFalse configures a Badger iteration to NOT preemptively load 335 // the values when iterating over keys (ie. key-only iteration). Key-only iteration 336 // is several order of magnitudes faster than regular iteration, because it involves 337 // access to the LSM-tree only, which is usually resident entirely in RAM. 338 func withPrefetchValuesFalse(options *badger.IteratorOptions) { 339 options.PrefetchValues = false 340 } 341 342 // iterate iterates over a range of keys defined by a start and end key. The 343 // start key may be higher than the end key, in which case we iterate in 344 // reverse order. 345 // 346 // The iteration range uses prefix-wise semantics. Specifically, all keys that 347 // meet ANY of the following conditions are included in the iteration: 348 // - have a prefix equal to the start key OR 349 // - have a prefix equal to the end key OR 350 // - have a prefix that is lexicographically between start and end 351 // 352 // On each iteration, it will call the iteration function to initialize 353 // functions specific to processing the given key-value pair. 354 // 355 // TODO: this function is unbounded – pass context.Context to this or calling functions to allow timing functions out. 356 // No errors are expected during normal operation. Any errors returned by the 357 // provided handleFunc will be propagated back to the caller of iterate. 358 func iterate(start []byte, end []byte, iteration iterationFunc, opts ...func(*badger.IteratorOptions)) func(*badger.Txn) error { 359 return func(tx *badger.Txn) error { 360 361 // initialize the default options and comparison modifier for iteration 362 modifier := 1 363 options := badger.DefaultIteratorOptions 364 for _, apply := range opts { 365 apply(&options) 366 } 367 368 // In order to satisfy this function's prefix-wise inclusion semantics, 369 // we append 0xff bytes to the largest of start and end. 370 // This ensures Badger will seek to the largest key with that prefix 371 // for reverse iteration, thus including all keys with a prefix matching 372 // the starting key. It also enables us to detect boundary conditions by 373 // simple lexicographic comparison (ie. bytes.Compare) rather than 374 // explicitly comparing prefixes. 375 // 376 // See https://github.com/onflow/flow-go/pull/3310#issuecomment-618127494 377 // for discussion and more detail on this. 378 379 // If start is bigger than end, we have a backwards iteration: 380 // 1) We set the reverse option on the iterator, so we step through all 381 // the keys backwards. This modifies the behaviour of Seek to go to 382 // the first key that is less than or equal to the start key (as 383 // opposed to greater than or equal in a regular iteration). 384 // 2) In order to satisfy this function's prefix-wise inclusion semantics, 385 // we append a 0xff-byte suffix to the start key so the seek will go 386 // to the right place. 387 // 3) For a regular iteration, we break the loop upon hitting the first 388 // item that has a key higher than the end prefix. In order to reverse 389 // this, we use a modifier for the comparison that reverses the check 390 // and makes it stop upon the first item lower than the end prefix. 391 if bytes.Compare(start, end) > 0 { 392 options.Reverse = true // make sure to go in reverse order 393 modifier = -1 // make sure to stop after end prefix 394 length := uint32(len(start)) 395 diff := max - length 396 for i := uint32(0); i < diff; i++ { 397 start = append(start, 0xff) 398 } 399 } else { 400 // for forward iteration, add the 0xff-bytes suffix to the end 401 // prefix, to ensure we include all keys with that prefix before 402 // finishing. 403 length := uint32(len(end)) 404 diff := max - length 405 for i := uint32(0); i < diff; i++ { 406 end = append(end, 0xff) 407 } 408 } 409 410 it := tx.NewIterator(options) 411 defer it.Close() 412 413 for it.Seek(start); it.Valid(); it.Next() { 414 415 item := it.Item() 416 417 key := item.Key() 418 // for forward iteration, check whether key > end, for backward 419 // iteration check whether key < end 420 if bytes.Compare(key, end)*modifier > 0 { 421 break 422 } 423 424 // initialize processing functions for iteration 425 check, create, handle := iteration() 426 427 // check if we should process the item at all 428 ok := check(key) 429 if !ok { 430 continue 431 } 432 433 // process the actual item 434 err := item.Value(func(val []byte) error { 435 436 // decode into the entity 437 entity := create() 438 err := msgpack.Unmarshal(val, entity) 439 if err != nil { 440 return irrecoverable.NewExceptionf("could not decode entity: %w", err) 441 } 442 443 // process the entity 444 err = handle() 445 if err != nil { 446 return fmt.Errorf("could not handle entity: %w", err) 447 } 448 449 return nil 450 }) 451 if err != nil { 452 return fmt.Errorf("could not process value: %w", err) 453 } 454 } 455 456 return nil 457 } 458 } 459 460 // traverse iterates over a range of keys defined by a prefix. 461 // 462 // The prefix must be shared by all keys in the iteration. 463 // 464 // On each iteration, it will call the iteration function to initialize 465 // functions specific to processing the given key-value pair. 466 func traverse(prefix []byte, iteration iterationFunc) func(*badger.Txn) error { 467 return func(tx *badger.Txn) error { 468 if len(prefix) == 0 { 469 return fmt.Errorf("prefix must not be empty") 470 } 471 472 opts := badger.DefaultIteratorOptions 473 // NOTE: this is an optimization only, it does not enforce that all 474 // results in the iteration have this prefix. 475 opts.Prefix = prefix 476 477 it := tx.NewIterator(opts) 478 defer it.Close() 479 480 // this is where we actually enforce that all results have the prefix 481 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 482 483 item := it.Item() 484 485 // initialize processing functions for iteration 486 check, create, handle := iteration() 487 488 // check if we should process the item at all 489 key := item.Key() 490 ok := check(key) 491 if !ok { 492 continue 493 } 494 495 // process the actual item 496 err := item.Value(func(val []byte) error { 497 498 // decode into the entity 499 entity := create() 500 err := msgpack.Unmarshal(val, entity) 501 if err != nil { 502 return irrecoverable.NewExceptionf("could not decode entity: %w", err) 503 } 504 505 // process the entity 506 err = handle() 507 if err != nil { 508 return fmt.Errorf("could not handle entity: %w", err) 509 } 510 511 return nil 512 }) 513 if err != nil { 514 return fmt.Errorf("could not process value: %w", err) 515 } 516 } 517 518 return nil 519 } 520 } 521 522 // findHighestAtOrBelow searches for the highest key with the given prefix and a height 523 // at or below the target height, and retrieves and decodes the value associated with the 524 // key into the given entity. 525 // If no key is found, the function returns storage.ErrNotFound. 526 func findHighestAtOrBelow( 527 prefix []byte, 528 height uint64, 529 entity interface{}, 530 ) func(*badger.Txn) error { 531 return func(tx *badger.Txn) error { 532 if len(prefix) == 0 { 533 return fmt.Errorf("prefix must not be empty") 534 } 535 536 opts := badger.DefaultIteratorOptions 537 opts.Prefix = prefix 538 opts.Reverse = true 539 540 it := tx.NewIterator(opts) 541 defer it.Close() 542 543 it.Seek(append(prefix, b(height)...)) 544 545 if !it.Valid() { 546 return storage.ErrNotFound 547 } 548 549 return it.Item().Value(func(val []byte) error { 550 err := msgpack.Unmarshal(val, entity) 551 if err != nil { 552 return fmt.Errorf("could not decode entity: %w", err) 553 } 554 return nil 555 }) 556 } 557 } 558 559 // Fail returns a DB operation function that always fails with the given error. 560 func Fail(err error) func(*badger.Txn) error { 561 return func(_ *badger.Txn) error { 562 return err 563 } 564 }