github.com/onflow/flow-go@v0.33.17/storage/badger/operation/common.go (about) 1 // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED 2 3 package operation 4 5 import ( 6 "bytes" 7 "errors" 8 "fmt" 9 10 "github.com/dgraph-io/badger/v2" 11 "github.com/vmihailenco/msgpack/v4" 12 13 "github.com/onflow/flow-go/model/flow" 14 "github.com/onflow/flow-go/module/irrecoverable" 15 "github.com/onflow/flow-go/storage" 16 ) 17 18 // batchWrite will encode the given entity using msgpack and will upsert the resulting 19 // binary data in the badger wrote batch under the provided key - if the value already exists 20 // in the database it will be overridden. 21 // No errors are expected during normal operation. 22 func batchWrite(key []byte, entity interface{}) func(writeBatch *badger.WriteBatch) error { 23 return func(writeBatch *badger.WriteBatch) error { 24 25 // update the maximum key size if the inserted key is bigger 26 if uint32(len(key)) > max { 27 max = uint32(len(key)) 28 err := SetMax(writeBatch) 29 if err != nil { 30 return fmt.Errorf("could not update max tracker: %w", err) 31 } 32 } 33 34 // serialize the entity data 35 val, err := msgpack.Marshal(entity) 36 if err != nil { 37 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 38 } 39 40 // persist the entity data into the DB 41 err = writeBatch.Set(key, val) 42 if err != nil { 43 return irrecoverable.NewExceptionf("could not store data: %w", err) 44 } 45 return nil 46 } 47 } 48 49 // insert will encode the given entity using msgpack and will insert the resulting 50 // binary data in the badger DB under the provided key. It will error if the 51 // key already exists. 52 // Error returns: 53 // - storage.ErrAlreadyExists if the key already exists in the database. 54 // - generic error in case of unexpected failure from the database layer or 55 // encoding failure. 56 func insert(key []byte, entity interface{}) func(*badger.Txn) error { 57 return func(tx *badger.Txn) error { 58 59 // update the maximum key size if the inserted key is bigger 60 if uint32(len(key)) > max { 61 max = uint32(len(key)) 62 err := SetMax(tx) 63 if err != nil { 64 return fmt.Errorf("could not update max tracker: %w", err) 65 } 66 } 67 68 // check if the key already exists in the db 69 _, err := tx.Get(key) 70 if err == nil { 71 return storage.ErrAlreadyExists 72 } 73 74 if !errors.Is(err, badger.ErrKeyNotFound) { 75 return irrecoverable.NewExceptionf("could not retrieve key: %w", err) 76 } 77 78 // serialize the entity data 79 val, err := msgpack.Marshal(entity) 80 if err != nil { 81 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 82 } 83 84 // persist the entity data into the DB 85 err = tx.Set(key, val) 86 if err != nil { 87 return irrecoverable.NewExceptionf("could not store data: %w", err) 88 } 89 return nil 90 } 91 } 92 93 // update will encode the given entity with MsgPack and update the binary data 94 // under the given key in the badger DB. The key must already exist. 95 // Error returns: 96 // - storage.ErrNotFound if the key does not already exist in the database. 97 // - generic error in case of unexpected failure from the database layer or 98 // encoding failure. 99 func update(key []byte, entity interface{}) func(*badger.Txn) error { 100 return func(tx *badger.Txn) error { 101 102 // retrieve the item from the key-value store 103 _, err := tx.Get(key) 104 if errors.Is(err, badger.ErrKeyNotFound) { 105 return storage.ErrNotFound 106 } 107 if err != nil { 108 return irrecoverable.NewExceptionf("could not check key: %w", err) 109 } 110 111 // serialize the entity data 112 val, err := msgpack.Marshal(entity) 113 if err != nil { 114 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 115 } 116 117 // persist the entity data into the DB 118 err = tx.Set(key, val) 119 if err != nil { 120 return irrecoverable.NewExceptionf("could not replace data: %w", err) 121 } 122 123 return nil 124 } 125 } 126 127 // upsert will encode the given entity with MsgPack and upsert the binary data 128 // under the given key in the badger DB. 129 func upsert(key []byte, entity interface{}) func(*badger.Txn) error { 130 return func(tx *badger.Txn) error { 131 // update the maximum key size if the inserted key is bigger 132 if uint32(len(key)) > max { 133 max = uint32(len(key)) 134 err := SetMax(tx) 135 if err != nil { 136 return fmt.Errorf("could not update max tracker: %w", err) 137 } 138 } 139 140 // serialize the entity data 141 val, err := msgpack.Marshal(entity) 142 if err != nil { 143 return irrecoverable.NewExceptionf("could not encode entity: %w", err) 144 } 145 146 // persist the entity data into the DB 147 err = tx.Set(key, val) 148 if err != nil { 149 return irrecoverable.NewExceptionf("could not upsert data: %w", err) 150 } 151 152 return nil 153 } 154 } 155 156 // remove removes the entity with the given key, if it exists. If it doesn't 157 // exist, this is a no-op. 158 // Error returns: 159 // * storage.ErrNotFound if the key to delete does not exist. 160 // * generic error in case of unexpected database error 161 func remove(key []byte) func(*badger.Txn) error { 162 return func(tx *badger.Txn) error { 163 // retrieve the item from the key-value store 164 _, err := tx.Get(key) 165 if err != nil { 166 if errors.Is(err, badger.ErrKeyNotFound) { 167 return storage.ErrNotFound 168 } 169 return irrecoverable.NewExceptionf("could not check key: %w", err) 170 } 171 172 err = tx.Delete(key) 173 if err != nil { 174 return irrecoverable.NewExceptionf("could not delete item: %w", err) 175 } 176 return nil 177 } 178 } 179 180 // batchRemove removes entry under a given key in a write-batch. 181 // if key doesn't exist, does nothing. 182 // No errors are expected during normal operation. 183 func batchRemove(key []byte) func(writeBatch *badger.WriteBatch) error { 184 return func(writeBatch *badger.WriteBatch) error { 185 err := writeBatch.Delete(key) 186 if err != nil { 187 return irrecoverable.NewExceptionf("could not batch delete data: %w", err) 188 } 189 return nil 190 } 191 } 192 193 // removeByPrefix removes all the entities if the prefix of the key matches the given prefix. 194 // if no key matches, this is a no-op 195 // No errors are expected during normal operation. 196 func removeByPrefix(prefix []byte) func(*badger.Txn) error { 197 return func(tx *badger.Txn) error { 198 opts := badger.DefaultIteratorOptions 199 opts.AllVersions = false 200 opts.PrefetchValues = false 201 it := tx.NewIterator(opts) 202 defer it.Close() 203 204 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 205 key := it.Item().KeyCopy(nil) 206 err := tx.Delete(key) 207 if err != nil { 208 return irrecoverable.NewExceptionf("could not delete item with prefix: %w", err) 209 } 210 } 211 212 return nil 213 } 214 } 215 216 // batchRemoveByPrefix removes all items under the keys match the given prefix in a batch write transaction. 217 // no error would be returned if no key was found with the given prefix. 218 // all error returned should be exception 219 func batchRemoveByPrefix(prefix []byte) func(tx *badger.Txn, writeBatch *badger.WriteBatch) error { 220 return func(tx *badger.Txn, writeBatch *badger.WriteBatch) error { 221 222 opts := badger.DefaultIteratorOptions 223 opts.AllVersions = false 224 opts.PrefetchValues = false 225 it := tx.NewIterator(opts) 226 defer it.Close() 227 228 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 229 key := it.Item().KeyCopy(nil) 230 err := writeBatch.Delete(key) 231 if err != nil { 232 return irrecoverable.NewExceptionf("could not delete item in batch: %w", err) 233 } 234 } 235 return nil 236 } 237 } 238 239 // retrieve will retrieve the binary data under the given key from the badger DB 240 // and decode it into the given entity. The provided entity needs to be a 241 // pointer to an initialized entity of the correct type. 242 // Error returns: 243 // - storage.ErrNotFound if the key does not exist in the database 244 // - generic error in case of unexpected failure from the database layer, or failure 245 // to decode an existing database value 246 func retrieve(key []byte, entity interface{}) func(*badger.Txn) error { 247 return func(tx *badger.Txn) error { 248 249 // retrieve the item from the key-value store 250 item, err := tx.Get(key) 251 if errors.Is(err, badger.ErrKeyNotFound) { 252 return storage.ErrNotFound 253 } 254 if err != nil { 255 return irrecoverable.NewExceptionf("could not load data: %w", err) 256 } 257 258 // get the value from the item 259 err = item.Value(func(val []byte) error { 260 err := msgpack.Unmarshal(val, entity) 261 return err 262 }) 263 if err != nil { 264 return irrecoverable.NewExceptionf("could not decode entity: %w", err) 265 } 266 267 return nil 268 } 269 } 270 271 // exists returns true if a key exists in the database. 272 // No errors are expected during normal operation. 273 func exists(key []byte, keyExists *bool) func(*badger.Txn) error { 274 return func(tx *badger.Txn) error { 275 _, err := tx.Get(key) 276 if err != nil { 277 // the key does not exist in the database 278 if errors.Is(err, badger.ErrKeyNotFound) { 279 *keyExists = false 280 return nil 281 } 282 // exception while checking for the key 283 return irrecoverable.NewExceptionf("could not load data: %w", err) 284 } 285 286 // the key does exist in the database 287 *keyExists = true 288 return nil 289 } 290 } 291 292 // checkFunc is called during key iteration through the badger DB in order to 293 // check whether we should process the given key-value pair. It can be used to 294 // avoid loading the value if its not of interest, as well as storing the key 295 // for the current iteration step. 296 type checkFunc func(key []byte) bool 297 298 // createFunc returns a pointer to an initialized entity that we can potentially 299 // decode the next value into during a badger DB iteration. 300 type createFunc func() interface{} 301 302 // handleFunc is a function that starts the processing of the current key-value 303 // pair during a badger iteration. It should be called after the key was checked 304 // and the entity was decoded. 305 // No errors are expected during normal operation. Any errors will halt the iteration. 306 type handleFunc func() error 307 308 // iterationFunc is a function provided to our low-level iteration function that 309 // allows us to pass badger efficiencies across badger boundaries. By calling it 310 // for each iteration step, we can inject a function to check the key, a 311 // function to create the decode target and a function to process the current 312 // key-value pair. This a consumer of the API to decode when to skip the loading 313 // of values, the initialization of entities and the processing. 314 type iterationFunc func() (checkFunc, createFunc, handleFunc) 315 316 // lookup is the default iteration function allowing us to collect a list of 317 // entity IDs from an index. 318 func lookup(entityIDs *[]flow.Identifier) func() (checkFunc, createFunc, handleFunc) { 319 *entityIDs = make([]flow.Identifier, 0, len(*entityIDs)) 320 return func() (checkFunc, createFunc, handleFunc) { 321 check := func(key []byte) bool { 322 return true 323 } 324 var entityID flow.Identifier 325 create := func() interface{} { 326 return &entityID 327 } 328 handle := func() error { 329 *entityIDs = append(*entityIDs, entityID) 330 return nil 331 } 332 return check, create, handle 333 } 334 } 335 336 // withPrefetchValuesFalse configures a Badger iteration to NOT preemptively load 337 // the values when iterating over keys (ie. key-only iteration). Key-only iteration 338 // is several order of magnitudes faster than regular iteration, because it involves 339 // access to the LSM-tree only, which is usually resident entirely in RAM. 340 func withPrefetchValuesFalse(options *badger.IteratorOptions) { 341 options.PrefetchValues = false 342 } 343 344 // iterate iterates over a range of keys defined by a start and end key. The 345 // start key may be higher than the end key, in which case we iterate in 346 // reverse order. 347 // 348 // The iteration range uses prefix-wise semantics. Specifically, all keys that 349 // meet ANY of the following conditions are included in the iteration: 350 // - have a prefix equal to the start key OR 351 // - have a prefix equal to the end key OR 352 // - have a prefix that is lexicographically between start and end 353 // 354 // On each iteration, it will call the iteration function to initialize 355 // functions specific to processing the given key-value pair. 356 // 357 // TODO: this function is unbounded – pass context.Context to this or calling functions to allow timing functions out. 358 // No errors are expected during normal operation. Any errors returned by the 359 // provided handleFunc will be propagated back to the caller of iterate. 360 func iterate(start []byte, end []byte, iteration iterationFunc, opts ...func(*badger.IteratorOptions)) func(*badger.Txn) error { 361 return func(tx *badger.Txn) error { 362 363 // initialize the default options and comparison modifier for iteration 364 modifier := 1 365 options := badger.DefaultIteratorOptions 366 for _, apply := range opts { 367 apply(&options) 368 } 369 370 // In order to satisfy this function's prefix-wise inclusion semantics, 371 // we append 0xff bytes to the largest of start and end. 372 // This ensures Badger will seek to the largest key with that prefix 373 // for reverse iteration, thus including all keys with a prefix matching 374 // the starting key. It also enables us to detect boundary conditions by 375 // simple lexicographic comparison (ie. bytes.Compare) rather than 376 // explicitly comparing prefixes. 377 // 378 // See https://github.com/onflow/flow-go/pull/3310#issuecomment-618127494 379 // for discussion and more detail on this. 380 381 // If start is bigger than end, we have a backwards iteration: 382 // 1) We set the reverse option on the iterator, so we step through all 383 // the keys backwards. This modifies the behaviour of Seek to go to 384 // the first key that is less than or equal to the start key (as 385 // opposed to greater than or equal in a regular iteration). 386 // 2) In order to satisfy this function's prefix-wise inclusion semantics, 387 // we append a 0xff-byte suffix to the start key so the seek will go 388 // to the right place. 389 // 3) For a regular iteration, we break the loop upon hitting the first 390 // item that has a key higher than the end prefix. In order to reverse 391 // this, we use a modifier for the comparison that reverses the check 392 // and makes it stop upon the first item lower than the end prefix. 393 if bytes.Compare(start, end) > 0 { 394 options.Reverse = true // make sure to go in reverse order 395 modifier = -1 // make sure to stop after end prefix 396 length := uint32(len(start)) 397 diff := max - length 398 for i := uint32(0); i < diff; i++ { 399 start = append(start, 0xff) 400 } 401 } else { 402 // for forward iteration, add the 0xff-bytes suffix to the end 403 // prefix, to ensure we include all keys with that prefix before 404 // finishing. 405 length := uint32(len(end)) 406 diff := max - length 407 for i := uint32(0); i < diff; i++ { 408 end = append(end, 0xff) 409 } 410 } 411 412 it := tx.NewIterator(options) 413 defer it.Close() 414 415 for it.Seek(start); it.Valid(); it.Next() { 416 417 item := it.Item() 418 419 key := item.Key() 420 // for forward iteration, check whether key > end, for backward 421 // iteration check whether key < end 422 if bytes.Compare(key, end)*modifier > 0 { 423 break 424 } 425 426 // initialize processing functions for iteration 427 check, create, handle := iteration() 428 429 // check if we should process the item at all 430 ok := check(key) 431 if !ok { 432 continue 433 } 434 435 // process the actual item 436 err := item.Value(func(val []byte) error { 437 438 // decode into the entity 439 entity := create() 440 err := msgpack.Unmarshal(val, entity) 441 if err != nil { 442 return irrecoverable.NewExceptionf("could not decode entity: %w", err) 443 } 444 445 // process the entity 446 err = handle() 447 if err != nil { 448 return fmt.Errorf("could not handle entity: %w", err) 449 } 450 451 return nil 452 }) 453 if err != nil { 454 return fmt.Errorf("could not process value: %w", err) 455 } 456 } 457 458 return nil 459 } 460 } 461 462 // traverse iterates over a range of keys defined by a prefix. 463 // 464 // The prefix must be shared by all keys in the iteration. 465 // 466 // On each iteration, it will call the iteration function to initialize 467 // functions specific to processing the given key-value pair. 468 func traverse(prefix []byte, iteration iterationFunc) func(*badger.Txn) error { 469 return func(tx *badger.Txn) error { 470 if len(prefix) == 0 { 471 return fmt.Errorf("prefix must not be empty") 472 } 473 474 opts := badger.DefaultIteratorOptions 475 // NOTE: this is an optimization only, it does not enforce that all 476 // results in the iteration have this prefix. 477 opts.Prefix = prefix 478 479 it := tx.NewIterator(opts) 480 defer it.Close() 481 482 // this is where we actually enforce that all results have the prefix 483 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 484 485 item := it.Item() 486 487 // initialize processing functions for iteration 488 check, create, handle := iteration() 489 490 // check if we should process the item at all 491 key := item.Key() 492 ok := check(key) 493 if !ok { 494 continue 495 } 496 497 // process the actual item 498 err := item.Value(func(val []byte) error { 499 500 // decode into the entity 501 entity := create() 502 err := msgpack.Unmarshal(val, entity) 503 if err != nil { 504 return irrecoverable.NewExceptionf("could not decode entity: %w", err) 505 } 506 507 // process the entity 508 err = handle() 509 if err != nil { 510 return fmt.Errorf("could not handle entity: %w", err) 511 } 512 513 return nil 514 }) 515 if err != nil { 516 return fmt.Errorf("could not process value: %w", err) 517 } 518 } 519 520 return nil 521 } 522 } 523 524 // findHighestAtOrBelow searches for the highest key with the given prefix and a height 525 // at or below the target height, and retrieves and decodes the value associated with the 526 // key into the given entity. 527 // If no key is found, the function returns storage.ErrNotFound. 528 func findHighestAtOrBelow( 529 prefix []byte, 530 height uint64, 531 entity interface{}, 532 ) func(*badger.Txn) error { 533 return func(tx *badger.Txn) error { 534 if len(prefix) == 0 { 535 return fmt.Errorf("prefix must not be empty") 536 } 537 538 opts := badger.DefaultIteratorOptions 539 opts.Prefix = prefix 540 opts.Reverse = true 541 542 it := tx.NewIterator(opts) 543 defer it.Close() 544 545 it.Seek(append(prefix, b(height)...)) 546 547 if !it.Valid() { 548 return storage.ErrNotFound 549 } 550 551 return it.Item().Value(func(val []byte) error { 552 err := msgpack.Unmarshal(val, entity) 553 if err != nil { 554 return fmt.Errorf("could not decode entity: %w", err) 555 } 556 return nil 557 }) 558 } 559 } 560 561 // Fail returns a DB operation function that always fails with the given error. 562 func Fail(err error) func(*badger.Txn) error { 563 return func(_ *badger.Txn) error { 564 return err 565 } 566 }