github.com/koko1123/flow-go-1@v0.29.6/storage/badger/operation/common.go (about) 1 // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED 2 3 package operation 4 5 import ( 6 "bytes" 7 "errors" 8 "fmt" 9 10 "github.com/dgraph-io/badger/v3" 11 "github.com/vmihailenco/msgpack/v4" 12 13 "github.com/koko1123/flow-go-1/model/flow" 14 "github.com/koko1123/flow-go-1/storage" 15 ) 16 17 // batchWrite will encode the given entity using msgpack and will upsert the resulting 18 // binary data in the badger wrote batch under the provided key - if the value already exists 19 // in the database it will be overridden. 20 // No errors are expected during normal operation. 21 func batchWrite(key []byte, entity interface{}) func(writeBatch *badger.WriteBatch) error { 22 return func(writeBatch *badger.WriteBatch) error { 23 24 // update the maximum key size if the inserted key is bigger 25 if uint32(len(key)) > max { 26 max = uint32(len(key)) 27 err := SetMax(writeBatch) 28 if err != nil { 29 return fmt.Errorf("could not update max tracker: %w", err) 30 } 31 } 32 33 // serialize the entity data 34 val, err := msgpack.Marshal(entity) 35 if err != nil { 36 return fmt.Errorf("could not encode entity: %w", err) 37 } 38 39 // persist the entity data into the DB 40 err = writeBatch.Set(key, val) 41 if err != nil { 42 return fmt.Errorf("could not store data: %w", err) 43 } 44 return nil 45 } 46 } 47 48 // insert will encode the given entity using msgpack and will insert the resulting 49 // binary data in the badger DB under the provided key. It will error if the 50 // key already exists. 51 // Error returns: 52 // - storage.ErrAlreadyExists if the key already exists in the database. 53 // - generic error in case of unexpected failure from the database layer or 54 // encoding failure. 55 func insert(key []byte, entity interface{}) func(*badger.Txn) error { 56 return func(tx *badger.Txn) error { 57 58 // update the maximum key size if the inserted key is bigger 59 if uint32(len(key)) > max { 60 max = uint32(len(key)) 61 err := SetMax(tx) 62 if err != nil { 63 return fmt.Errorf("could not update max tracker: %w", err) 64 } 65 } 66 67 // check if the key already exists in the db 68 _, err := tx.Get(key) 69 if err == nil { 70 return storage.ErrAlreadyExists 71 } 72 73 if !errors.Is(err, badger.ErrKeyNotFound) { 74 return fmt.Errorf("could not retrieve key: %w", err) 75 } 76 77 // serialize the entity data 78 val, err := msgpack.Marshal(entity) 79 if err != nil { 80 return fmt.Errorf("could not encode entity: %w", err) 81 } 82 83 // persist the entity data into the DB 84 err = tx.Set(key, val) 85 if err != nil { 86 return fmt.Errorf("could not store data: %w", err) 87 } 88 return nil 89 } 90 } 91 92 // update will encode the given entity with MsgPack and update the binary data 93 // under the given key in the badger DB. The key must already exist. 94 // Error returns: 95 // - storage.ErrNotFound if the key does not already exist in the database. 96 // - generic error in case of unexpected failure from the database layer or 97 // encoding failure. 98 func update(key []byte, entity interface{}) func(*badger.Txn) error { 99 return func(tx *badger.Txn) error { 100 101 // retrieve the item from the key-value store 102 _, err := tx.Get(key) 103 if errors.Is(err, badger.ErrKeyNotFound) { 104 return storage.ErrNotFound 105 } 106 if err != nil { 107 return fmt.Errorf("could not check key: %w", err) 108 } 109 110 // serialize the entity data 111 val, err := msgpack.Marshal(entity) 112 if err != nil { 113 return fmt.Errorf("could not encode entity: %w", err) 114 } 115 116 // persist the entity data into the DB 117 err = tx.Set(key, val) 118 if err != nil { 119 return fmt.Errorf("could not replace data: %w", err) 120 } 121 122 return nil 123 } 124 } 125 126 // upsert will encode the given entity with MsgPack and upsert the binary data 127 // under the given key in the badger DB. 128 func upsert(key []byte, entity interface{}) func(*badger.Txn) error { 129 return func(tx *badger.Txn) error { 130 // update the maximum key size if the inserted key is bigger 131 if uint32(len(key)) > max { 132 max = uint32(len(key)) 133 err := SetMax(tx) 134 if err != nil { 135 return fmt.Errorf("could not update max tracker: %w", err) 136 } 137 } 138 139 // serialize the entity data 140 val, err := msgpack.Marshal(entity) 141 if err != nil { 142 return fmt.Errorf("could not encode entity: %w", err) 143 } 144 145 // persist the entity data into the DB 146 err = tx.Set(key, val) 147 if err != nil { 148 return fmt.Errorf("could not upsert data: %w", err) 149 } 150 151 return nil 152 } 153 } 154 155 // remove removes the entity with the given key, if it exists. If it doesn't 156 // exist, this is a no-op. 157 // Error returns: 158 // * storage.ErrNotFound if the key to delete does not exist. 159 // * generic error in case of unexpected database error 160 func remove(key []byte) func(*badger.Txn) error { 161 return func(tx *badger.Txn) error { 162 // retrieve the item from the key-value store 163 _, err := tx.Get(key) 164 if errors.Is(err, badger.ErrKeyNotFound) { 165 return storage.ErrNotFound 166 } 167 if err != nil { 168 return fmt.Errorf("could not check key: %w", err) 169 } 170 171 err = tx.Delete(key) 172 return err 173 } 174 } 175 176 // batchRemove removes entry under a given key in a write-batch. 177 // if key doesn't exist, does nothing. 178 // No errors are expected during normal operation. 179 func batchRemove(key []byte) func(writeBatch *badger.WriteBatch) error { 180 return func(writeBatch *badger.WriteBatch) error { 181 err := writeBatch.Delete(key) 182 if err != nil { 183 return fmt.Errorf("could not batch delete data: %w", err) 184 } 185 return nil 186 } 187 } 188 189 // removeByPrefix removes all the entities if the prefix of the key matches the given prefix. 190 // if no key matches, this is a no-op 191 // No errors are expected during normal operation. 192 func removeByPrefix(prefix []byte) func(*badger.Txn) error { 193 return func(tx *badger.Txn) error { 194 opts := badger.DefaultIteratorOptions 195 opts.AllVersions = false 196 opts.PrefetchValues = false 197 it := tx.NewIterator(opts) 198 defer it.Close() 199 200 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 201 key := it.Item().KeyCopy(nil) 202 err := tx.Delete(key) 203 if err != nil { 204 return err 205 } 206 } 207 208 return nil 209 } 210 } 211 212 // batchRemoveByPrefix removes all items under the keys match the given prefix in a batch write transaction. 213 // no error would be returned if no key was found with the given prefix. 214 // all error returned should be exception 215 func batchRemoveByPrefix(prefix []byte) func(tx *badger.Txn, writeBatch *badger.WriteBatch) error { 216 return func(tx *badger.Txn, writeBatch *badger.WriteBatch) error { 217 218 opts := badger.DefaultIteratorOptions 219 opts.AllVersions = false 220 opts.PrefetchValues = false 221 it := tx.NewIterator(opts) 222 defer it.Close() 223 224 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 225 key := it.Item().KeyCopy(nil) 226 err := writeBatch.Delete(key) 227 if err != nil { 228 return err 229 } 230 } 231 return nil 232 } 233 } 234 235 // retrieve will retrieve the binary data under the given key from the badger DB 236 // and decode it into the given entity. The provided entity needs to be a 237 // pointer to an initialized entity of the correct type. 238 // Error returns: 239 // - storage.ErrNotFound if the key does not exist in the database 240 // - generic error in case of unexpected failure from the database layer, or failure 241 // to decode an existing database value 242 func retrieve(key []byte, entity interface{}) func(*badger.Txn) error { 243 return func(tx *badger.Txn) error { 244 245 // retrieve the item from the key-value store 246 item, err := tx.Get(key) 247 if errors.Is(err, badger.ErrKeyNotFound) { 248 return storage.ErrNotFound 249 } 250 if err != nil { 251 return fmt.Errorf("could not load data: %w", err) 252 } 253 254 // get the value from the item 255 err = item.Value(func(val []byte) error { 256 err := msgpack.Unmarshal(val, entity) 257 return err 258 }) 259 if err != nil { 260 return fmt.Errorf("could not decode entity: %w", err) 261 } 262 263 return nil 264 } 265 } 266 267 // checkFunc is called during key iteration through the badger DB in order to 268 // check whether we should process the given key-value pair. It can be used to 269 // avoid loading the value if its not of interest, as well as storing the key 270 // for the current iteration step. 271 type checkFunc func(key []byte) bool 272 273 // createFunc returns a pointer to an initialized entity that we can potentially 274 // decode the next value into during a badger DB iteration. 275 type createFunc func() interface{} 276 277 // handleFunc is a function that starts the processing of the current key-value 278 // pair during a badger iteration. It should be called after the key was checked 279 // and the entity was decoded. 280 // No errors are expected during normal operation. Any errors will halt the iteration. 281 type handleFunc func() error 282 283 // iterationFunc is a function provided to our low-level iteration function that 284 // allows us to pass badger efficiencies across badger boundaries. By calling it 285 // for each iteration step, we can inject a function to check the key, a 286 // function to create the decode target and a function to process the current 287 // key-value pair. This a consumer of the API to decode when to skip the loading 288 // of values, the initialization of entities and the processing. 289 type iterationFunc func() (checkFunc, createFunc, handleFunc) 290 291 // lookup is the default iteration function allowing us to collect a list of 292 // entity IDs from an index. 293 func lookup(entityIDs *[]flow.Identifier) func() (checkFunc, createFunc, handleFunc) { 294 *entityIDs = make([]flow.Identifier, 0, len(*entityIDs)) 295 return func() (checkFunc, createFunc, handleFunc) { 296 check := func(key []byte) bool { 297 return true 298 } 299 var entityID flow.Identifier 300 create := func() interface{} { 301 return &entityID 302 } 303 handle := func() error { 304 *entityIDs = append(*entityIDs, entityID) 305 return nil 306 } 307 return check, create, handle 308 } 309 } 310 311 // withPrefetchValuesFalse configures a Badger iteration to NOT preemptively load 312 // the values when iterating over keys (ie. key-only iteration). Key-only iteration 313 // is several order of magnitudes faster than regular iteration, because it involves 314 // access to the LSM-tree only, which is usually resident entirely in RAM. 315 func withPrefetchValuesFalse(options *badger.IteratorOptions) { 316 options.PrefetchValues = false 317 } 318 319 // iterate iterates over a range of keys defined by a start and end key. The 320 // start key may be higher than the end key, in which case we iterate in 321 // reverse order. 322 // 323 // The iteration range uses prefix-wise semantics. Specifically, all keys that 324 // meet ANY of the following conditions are included in the iteration: 325 // - have a prefix equal to the start key OR 326 // - have a prefix equal to the end key OR 327 // - have a prefix that is lexicographically between start and end 328 // 329 // On each iteration, it will call the iteration function to initialize 330 // functions specific to processing the given key-value pair. 331 // 332 // TODO: this function is unbounded – pass context.Context to this or calling 333 // functions to allow timing functions out. 334 // No errors are expected during normal operation. Any errors returned by the 335 // provided handleFunc will be propagated back to the caller of iterate. 336 func iterate(start []byte, end []byte, iteration iterationFunc, opts ...func(*badger.IteratorOptions)) func(*badger.Txn) error { 337 return func(tx *badger.Txn) error { 338 339 // initialize the default options and comparison modifier for iteration 340 modifier := 1 341 options := badger.DefaultIteratorOptions 342 for _, apply := range opts { 343 apply(&options) 344 } 345 346 // In order to satisfy this function's prefix-wise inclusion semantics, 347 // we append 0xff bytes to the largest of start and end. 348 // This ensures Badger will seek to the largest key with that prefix 349 // for reverse iteration, thus including all keys with a prefix matching 350 // the starting key. It also enables us to detect boundary conditions by 351 // simple lexicographic comparison (ie. bytes.Compare) rather than 352 // explicitly comparing prefixes. 353 // 354 // See https://github.com/koko1123/flow-go-1/pull/3310#issuecomment-618127494 355 // for discussion and more detail on this. 356 357 // If start is bigger than end, we have a backwards iteration: 358 // 1) We set the reverse option on the iterator, so we step through all 359 // the keys backwards. This modifies the behaviour of Seek to go to 360 // the first key that is less than or equal to the start key (as 361 // opposed to greater than or equal in a regular iteration). 362 // 2) In order to satisfy this function's prefix-wise inclusion semantics, 363 // we append a 0xff-byte suffix to the start key so the seek will go 364 // to the right place. 365 // 3) For a regular iteration, we break the loop upon hitting the first 366 // item that has a key higher than the end prefix. In order to reverse 367 // this, we use a modifier for the comparison that reverses the check 368 // and makes it stop upon the first item lower than the end prefix. 369 if bytes.Compare(start, end) > 0 { 370 options.Reverse = true // make sure to go in reverse order 371 modifier = -1 // make sure to stop after end prefix 372 length := uint32(len(start)) 373 diff := max - length 374 for i := uint32(0); i < diff; i++ { 375 start = append(start, 0xff) 376 } 377 } else { 378 // for forward iteration, add the 0xff-bytes suffix to the end 379 // prefix, to ensure we include all keys with that prefix before 380 // finishing. 381 length := uint32(len(end)) 382 diff := max - length 383 for i := uint32(0); i < diff; i++ { 384 end = append(end, 0xff) 385 } 386 } 387 388 it := tx.NewIterator(options) 389 defer it.Close() 390 391 for it.Seek(start); it.Valid(); it.Next() { 392 393 item := it.Item() 394 395 key := item.Key() 396 // for forward iteration, check whether key > end, for backward 397 // iteration check whether key < end 398 if bytes.Compare(key, end)*modifier > 0 { 399 break 400 } 401 402 // initialize processing functions for iteration 403 check, create, handle := iteration() 404 405 // check if we should process the item at all 406 ok := check(key) 407 if !ok { 408 continue 409 } 410 411 // process the actual item 412 err := item.Value(func(val []byte) error { 413 414 // decode into the entity 415 entity := create() 416 err := msgpack.Unmarshal(val, entity) 417 if err != nil { 418 return fmt.Errorf("could not decode entity: %w", err) 419 } 420 421 // process the entity 422 err = handle() 423 if err != nil { 424 return fmt.Errorf("could not handle entity: %w", err) 425 } 426 427 return nil 428 }) 429 if err != nil { 430 return fmt.Errorf("could not process value: %w", err) 431 } 432 } 433 434 return nil 435 } 436 } 437 438 // traverse iterates over a range of keys defined by a prefix. 439 // 440 // The prefix must be shared by all keys in the iteration. 441 // 442 // On each iteration, it will call the iteration function to initialize 443 // functions specific to processing the given key-value pair. 444 func traverse(prefix []byte, iteration iterationFunc) func(*badger.Txn) error { 445 return func(tx *badger.Txn) error { 446 if len(prefix) == 0 { 447 return fmt.Errorf("prefix must not be empty") 448 } 449 450 opts := badger.DefaultIteratorOptions 451 // NOTE: this is an optimization only, it does not enforce that all 452 // results in the iteration have this prefix. 453 opts.Prefix = prefix 454 455 it := tx.NewIterator(opts) 456 defer it.Close() 457 458 // this is where we actually enforce that all results have the prefix 459 for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { 460 461 item := it.Item() 462 463 // initialize processing functions for iteration 464 check, create, handle := iteration() 465 466 // check if we should process the item at all 467 key := item.Key() 468 ok := check(key) 469 if !ok { 470 continue 471 } 472 473 // process the actual item 474 err := item.Value(func(val []byte) error { 475 476 // decode into the entity 477 entity := create() 478 err := msgpack.Unmarshal(val, entity) 479 if err != nil { 480 return fmt.Errorf("could not decode entity: %w", err) 481 } 482 483 // process the entity 484 err = handle() 485 if err != nil { 486 return fmt.Errorf("could not handle entity: %w", err) 487 } 488 489 return nil 490 }) 491 if err != nil { 492 return fmt.Errorf("could not process value: %w", err) 493 } 494 } 495 496 return nil 497 } 498 } 499 500 // Fail returns a DB operation function that always fails with the given error. 501 func Fail(err error) func(*badger.Txn) error { 502 return func(_ *badger.Txn) error { 503 return err 504 } 505 }