github.com/badrootd/celestia-core@v0.0.0-20240305091328-aa4207a4b25d/state/txindex/kv/kv.go (about) 1 package kv 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/hex" 7 "fmt" 8 "strconv" 9 "strings" 10 11 dbm "github.com/cometbft/cometbft-db" 12 "github.com/gogo/protobuf/proto" 13 14 abci "github.com/badrootd/celestia-core/abci/types" 15 "github.com/badrootd/celestia-core/libs/pubsub/query" 16 "github.com/badrootd/celestia-core/state/indexer" 17 "github.com/badrootd/celestia-core/state/txindex" 18 "github.com/badrootd/celestia-core/types" 19 ) 20 21 const ( 22 tagKeySeparator = "/" 23 eventSeqSeparator = "$es$" 24 ) 25 26 var _ txindex.TxIndexer = (*TxIndex)(nil) 27 28 // TxIndex is the simplest possible indexer, backed by key-value storage (levelDB). 29 type TxIndex struct { 30 store dbm.DB 31 // Number the events in the event list 32 eventSeq int64 33 } 34 35 // NewTxIndex creates new KV indexer. 36 func NewTxIndex(store dbm.DB) *TxIndex { 37 return &TxIndex{ 38 store: store, 39 } 40 } 41 42 // Get gets transaction from the TxIndex storage and returns it or nil if the 43 // transaction is not found. 44 func (txi *TxIndex) Get(hash []byte) (*abci.TxResult, error) { 45 if len(hash) == 0 { 46 return nil, txindex.ErrorEmptyHash 47 } 48 49 rawBytes, err := txi.store.Get(hash) 50 if err != nil { 51 panic(err) 52 } 53 if rawBytes == nil { 54 return nil, nil 55 } 56 57 txResult := new(abci.TxResult) 58 err = proto.Unmarshal(rawBytes, txResult) 59 if err != nil { 60 return nil, fmt.Errorf("error reading TxResult: %v", err) 61 } 62 63 return txResult, nil 64 } 65 66 // AddBatch indexes a batch of transactions using the given list of events. Each 67 // key that indexed from the tx's events is a composite of the event type and 68 // the respective attribute's key delimited by a "." (eg. "account.number"). 69 // Any event with an empty type is not indexed. 70 func (txi *TxIndex) AddBatch(b *txindex.Batch) error { 71 storeBatch := txi.store.NewBatch() 72 defer storeBatch.Close() 73 74 for _, result := range b.Ops { 75 err := txi.indexResult(storeBatch, result) 76 if err != nil { 77 return err 78 } 79 } 80 81 return storeBatch.WriteSync() 82 } 83 84 // Index indexes a single transaction using the given list of events. Each key 85 // that indexed from the tx's events is a composite of the event type and the 86 // respective attribute's key delimited by a "." (eg. "account.number"). 87 // Any event with an empty type is not indexed. 88 // 89 // If a transaction is indexed with the same hash as a previous transaction, it will 90 // be overwritten unless the tx result was NOT OK and the prior result was OK i.e. 91 // more transactions that successfully executed overwrite transactions that failed 92 // or successful yet older transactions. 93 func (txi *TxIndex) Index(result *abci.TxResult) error { 94 b := txi.store.NewBatch() 95 defer b.Close() 96 97 hash := types.Tx(result.Tx).Hash() 98 99 if !result.Result.IsOK() { 100 oldResult, err := txi.Get(hash) 101 if err != nil { 102 return err 103 } 104 105 // if the new transaction failed and it's already indexed in an older block and was successful 106 // we skip it as we want users to get the older successful transaction when they query. 107 if oldResult != nil && oldResult.Result.Code == abci.CodeTypeOK { 108 return nil 109 } 110 } 111 112 // index tx by events 113 err := txi.indexEvents(result, hash, b) 114 if err != nil { 115 return err 116 } 117 118 // index by height (always) 119 err = b.Set(keyForHeight(result), hash) 120 if err != nil { 121 return err 122 } 123 124 rawBytes, err := proto.Marshal(result) 125 if err != nil { 126 return err 127 } 128 // index by hash (always) 129 err = b.Set(hash, rawBytes) 130 if err != nil { 131 return err 132 } 133 134 return b.WriteSync() 135 } 136 137 func (txi *TxIndex) indexEvents(result *abci.TxResult, hash []byte, store dbm.Batch) error { 138 for _, event := range result.Result.Events { 139 txi.eventSeq = txi.eventSeq + 1 140 // only index events with a non-empty type 141 if len(event.Type) == 0 { 142 continue 143 } 144 145 for _, attr := range event.Attributes { 146 if len(attr.Key) == 0 { 147 continue 148 } 149 150 // index if `index: true` is set 151 compositeTag := fmt.Sprintf("%s.%s", event.Type, attr.Key) 152 // ensure event does not conflict with a reserved prefix key 153 if compositeTag == types.TxHashKey || compositeTag == types.TxHeightKey { 154 return fmt.Errorf("event type and attribute key \"%s\" is reserved; please use a different key", compositeTag) 155 } 156 if attr.GetIndex() { 157 err := store.Set(keyForEvent(compositeTag, attr.Value, result, txi.eventSeq), hash) 158 if err != nil { 159 return err 160 } 161 } 162 } 163 } 164 165 return nil 166 } 167 168 func (txi *TxIndex) indexResult(batch dbm.Batch, result *abci.TxResult) error { 169 hash := types.Tx(result.Tx).Hash() 170 171 rawBytes, err := proto.Marshal(result) 172 if err != nil { 173 return err 174 } 175 176 if !result.Result.IsOK() { 177 oldResult, err := txi.Get(hash) 178 if err != nil { 179 return err 180 } 181 182 // if the new transaction failed and it's already indexed in an older block and was successful 183 // we skip it as we want users to get the older successful transaction when they query. 184 if oldResult != nil && oldResult.Result.Code == abci.CodeTypeOK { 185 return nil 186 } 187 } 188 189 // index tx by events 190 err = txi.indexEvents(result, hash, batch) 191 if err != nil { 192 return err 193 } 194 195 // index by height (always) 196 err = batch.Set(keyForHeight(result), hash) 197 if err != nil { 198 return err 199 } 200 201 // index by hash (always) 202 err = batch.Set(hash, rawBytes) 203 if err != nil { 204 return err 205 } 206 return nil 207 } 208 209 // Search performs a search using the given query. 210 // 211 // It breaks the query into conditions (like "tx.height > 5"). For each 212 // condition, it queries the DB index. One special use cases here: (1) if 213 // "tx.hash" is found, it returns tx result for it (2) for range queries it is 214 // better for the client to provide both lower and upper bounds, so we are not 215 // performing a full scan. Results from querying indexes are then intersected 216 // and returned to the caller, in no particular order. 217 // 218 // Search will exit early and return any result fetched so far, 219 // when a message is received on the context chan. 220 func (txi *TxIndex) Search(ctx context.Context, q *query.Query) ([]*abci.TxResult, error) { 221 select { 222 case <-ctx.Done(): 223 return make([]*abci.TxResult, 0), nil 224 225 default: 226 } 227 228 var hashesInitialized bool 229 filteredHashes := make(map[string][]byte) 230 231 // get a list of conditions (like "tx.height > 5") 232 conditions, err := q.Conditions() 233 if err != nil { 234 return nil, fmt.Errorf("error during parsing conditions from query: %w", err) 235 } 236 237 // if there is a hash condition, return the result immediately 238 hash, ok, err := lookForHash(conditions) 239 if err != nil { 240 return nil, fmt.Errorf("error during searching for a hash in the query: %w", err) 241 } else if ok { 242 res, err := txi.Get(hash) 243 switch { 244 case err != nil: 245 return []*abci.TxResult{}, fmt.Errorf("error while retrieving the result: %w", err) 246 case res == nil: 247 return []*abci.TxResult{}, nil 248 default: 249 return []*abci.TxResult{res}, nil 250 } 251 } 252 253 var matchEvents bool 254 var matchEventIdx int 255 256 // If the match.events keyword is at the beginning of the query, we will only 257 // return heights where the conditions are true within the same event 258 // and set the matchEvents to true 259 conditions, matchEvents = dedupMatchEvents(conditions) 260 // conditions to skip because they're handled before "everything else" 261 skipIndexes := make([]int, 0) 262 263 if matchEvents { 264 matchEventIdx = 0 265 skipIndexes = append(skipIndexes, matchEventIdx) 266 } 267 268 // if there is a height condition ("tx.height=3"), extract it 269 // var height int64 270 // var heightIdx int 271 var heightInfo HeightInfo 272 if matchEvents { 273 // If we are not matching events and tx.height = 3 occurs more than once, the later value will 274 // overwrite the first one. For match.events it will create problems. 275 conditions, heightInfo = dedupHeight(conditions) 276 } else { 277 heightInfo.height, heightInfo.heightEqIdx = lookForHeight(conditions) 278 } 279 if matchEvents && !heightInfo.onlyHeightEq { 280 skipIndexes = append(skipIndexes, heightInfo.heightEqIdx) 281 } 282 // extract ranges 283 // if both upper and lower bounds exist, it's better to get them in order not 284 // no iterate over kvs that are not within range. 285 //If we have a query range over height and want to still look for 286 // specific event values we do not want to simply return all 287 // transactios in this height range. We remember the height range info 288 // and pass it on to match() to take into account when processing events. 289 ranges, rangeIndexes, heightRange := indexer.LookForRangesWithHeight(conditions) 290 heightInfo.heightRange = heightRange 291 292 if len(ranges) > 0 { 293 skipIndexes = append(skipIndexes, rangeIndexes...) 294 295 for _, qr := range ranges { 296 297 // If we have additional constraints and want to query per event 298 // attributes, we cannot simply return all blocks for a height. 299 // But we remember the height we want to find and forward it to 300 // match(). If we only have the height constraint and match.events keyword 301 // in the query (the second part of the ||), we don't need to query 302 // per event conditions and return all events within the height range. 303 if qr.Key == types.TxHeightKey && matchEvents && !heightInfo.onlyHeightRange { 304 continue 305 } 306 if !hashesInitialized { 307 filteredHashes = txi.matchRange(ctx, qr, startKey(qr.Key), filteredHashes, true, matchEvents, heightInfo) 308 hashesInitialized = true 309 310 // Ignore any remaining conditions if the first condition resulted 311 // in no matches (assuming implicit AND operand). 312 if len(filteredHashes) == 0 { 313 break 314 } 315 } else { 316 filteredHashes = txi.matchRange(ctx, qr, startKey(qr.Key), filteredHashes, false, matchEvents, heightInfo) 317 } 318 } 319 } 320 321 // for all other conditions 322 for i, c := range conditions { 323 if intInSlice(i, skipIndexes) { 324 continue 325 } 326 327 if !hashesInitialized { 328 filteredHashes = txi.match(ctx, c, startKeyForCondition(c, heightInfo.height), filteredHashes, true, matchEvents, heightInfo) 329 hashesInitialized = true 330 331 // Ignore any remaining conditions if the first condition resulted 332 // in no matches (assuming implicit AND operand). 333 if len(filteredHashes) == 0 { 334 break 335 } 336 } else { 337 filteredHashes = txi.match(ctx, c, startKeyForCondition(c, heightInfo.height), filteredHashes, false, matchEvents, heightInfo) 338 } 339 } 340 341 results := make([]*abci.TxResult, 0, len(filteredHashes)) 342 resultMap := make(map[string]struct{}) 343 RESULTS_LOOP: 344 for _, h := range filteredHashes { 345 res, err := txi.Get(h) 346 if err != nil { 347 return nil, fmt.Errorf("failed to get Tx{%X}: %w", h, err) 348 } 349 hashString := string(h) 350 if _, ok := resultMap[hashString]; !ok { 351 resultMap[hashString] = struct{}{} 352 results = append(results, res) 353 } 354 // Potentially exit early. 355 select { 356 case <-ctx.Done(): 357 break RESULTS_LOOP 358 default: 359 } 360 } 361 362 return results, nil 363 } 364 365 func lookForHash(conditions []query.Condition) (hash []byte, ok bool, err error) { 366 for _, c := range conditions { 367 if c.CompositeKey == types.TxHashKey { 368 decoded, err := hex.DecodeString(c.Operand.(string)) 369 return decoded, true, err 370 } 371 } 372 return 373 } 374 375 // lookForHeight returns a height if there is an "height=X" condition. 376 func lookForHeight(conditions []query.Condition) (height int64, heightIdx int) { 377 for i, c := range conditions { 378 if c.CompositeKey == types.TxHeightKey && c.Op == query.OpEqual { 379 return c.Operand.(int64), i 380 } 381 } 382 return 0, -1 383 } 384 func (txi *TxIndex) setTmpHashes(tmpHeights map[string][]byte, it dbm.Iterator, matchEvents bool) { 385 if matchEvents { 386 eventSeq := extractEventSeqFromKey(it.Key()) 387 tmpHeights[string(it.Value())+eventSeq] = it.Value() 388 } else { 389 tmpHeights[string(it.Value())] = it.Value() 390 } 391 } 392 393 // match returns all matching txs by hash that meet a given condition and start 394 // key. An already filtered result (filteredHashes) is provided such that any 395 // non-intersecting matches are removed. 396 // 397 // NOTE: filteredHashes may be empty if no previous condition has matched. 398 func (txi *TxIndex) match( 399 ctx context.Context, 400 c query.Condition, 401 startKeyBz []byte, 402 filteredHashes map[string][]byte, 403 firstRun bool, 404 matchEvents bool, 405 heightInfo HeightInfo, 406 ) map[string][]byte { 407 // A previous match was attempted but resulted in no matches, so we return 408 // no matches (assuming AND operand). 409 if !firstRun && len(filteredHashes) == 0 { 410 return filteredHashes 411 } 412 413 tmpHashes := make(map[string][]byte) 414 415 switch c.Op { 416 case query.OpEqual: 417 it, err := dbm.IteratePrefix(txi.store, startKeyBz) 418 if err != nil { 419 panic(err) 420 } 421 defer it.Close() 422 423 EQ_LOOP: 424 for ; it.Valid(); it.Next() { 425 426 // If we have a height range in a query, we need only transactions 427 // for this height 428 429 if matchEvents { 430 keyHeight, err := extractHeightFromKey(it.Key()) 431 if err != nil || !checkHeightConditions(heightInfo, keyHeight) { 432 continue 433 } 434 435 } 436 437 txi.setTmpHashes(tmpHashes, it, matchEvents) 438 // Potentially exit early. 439 select { 440 case <-ctx.Done(): 441 break EQ_LOOP 442 default: 443 } 444 } 445 if err := it.Error(); err != nil { 446 panic(err) 447 } 448 449 case query.OpExists: 450 // XXX: can't use startKeyBz here because c.Operand is nil 451 // (e.g. "account.owner/<nil>/" won't match w/ a single row) 452 it, err := dbm.IteratePrefix(txi.store, startKey(c.CompositeKey)) 453 if err != nil { 454 panic(err) 455 } 456 defer it.Close() 457 458 EXISTS_LOOP: 459 for ; it.Valid(); it.Next() { 460 if matchEvents { 461 keyHeight, err := extractHeightFromKey(it.Key()) 462 if err != nil || !checkHeightConditions(heightInfo, keyHeight) { 463 continue 464 } 465 466 } 467 txi.setTmpHashes(tmpHashes, it, matchEvents) 468 469 // Potentially exit early. 470 select { 471 case <-ctx.Done(): 472 break EXISTS_LOOP 473 default: 474 } 475 } 476 if err := it.Error(); err != nil { 477 panic(err) 478 } 479 480 case query.OpContains: 481 // XXX: startKey does not apply here. 482 // For example, if startKey = "account.owner/an/" and search query = "account.owner CONTAINS an" 483 // we can't iterate with prefix "account.owner/an/" because we might miss keys like "account.owner/Ulan/" 484 it, err := dbm.IteratePrefix(txi.store, startKey(c.CompositeKey)) 485 if err != nil { 486 panic(err) 487 } 488 defer it.Close() 489 490 CONTAINS_LOOP: 491 for ; it.Valid(); it.Next() { 492 if !isTagKey(it.Key()) { 493 continue 494 } 495 496 if strings.Contains(extractValueFromKey(it.Key()), c.Operand.(string)) { 497 if matchEvents { 498 keyHeight, err := extractHeightFromKey(it.Key()) 499 if err != nil || !checkHeightConditions(heightInfo, keyHeight) { 500 continue 501 } 502 } 503 txi.setTmpHashes(tmpHashes, it, matchEvents) 504 } 505 506 // Potentially exit early. 507 select { 508 case <-ctx.Done(): 509 break CONTAINS_LOOP 510 default: 511 } 512 } 513 if err := it.Error(); err != nil { 514 panic(err) 515 } 516 default: 517 panic("other operators should be handled already") 518 } 519 520 if len(tmpHashes) == 0 || firstRun { 521 // Either: 522 // 523 // 1. Regardless if a previous match was attempted, which may have had 524 // results, but no match was found for the current condition, then we 525 // return no matches (assuming AND operand). 526 // 527 // 2. A previous match was not attempted, so we return all results. 528 return tmpHashes 529 } 530 531 // Remove/reduce matches in filteredHashes that were not found in this 532 // match (tmpHashes). 533 REMOVE_LOOP: 534 for k, v := range filteredHashes { 535 tmpHash := tmpHashes[k] 536 if tmpHash == nil || !bytes.Equal(tmpHash, v) { 537 delete(filteredHashes, k) 538 539 // Potentially exit early. 540 select { 541 case <-ctx.Done(): 542 break REMOVE_LOOP 543 default: 544 } 545 } 546 } 547 548 return filteredHashes 549 } 550 551 // matchRange returns all matching txs by hash that meet a given queryRange and 552 // start key. An already filtered result (filteredHashes) is provided such that 553 // any non-intersecting matches are removed. 554 // 555 // NOTE: filteredHashes may be empty if no previous condition has matched. 556 func (txi *TxIndex) matchRange( 557 ctx context.Context, 558 qr indexer.QueryRange, 559 startKey []byte, 560 filteredHashes map[string][]byte, 561 firstRun bool, 562 matchEvents bool, 563 heightInfo HeightInfo, 564 ) map[string][]byte { 565 // A previous match was attempted but resulted in no matches, so we return 566 // no matches (assuming AND operand). 567 if !firstRun && len(filteredHashes) == 0 { 568 return filteredHashes 569 } 570 571 tmpHashes := make(map[string][]byte) 572 573 it, err := dbm.IteratePrefix(txi.store, startKey) 574 if err != nil { 575 panic(err) 576 } 577 defer it.Close() 578 579 LOOP: 580 for ; it.Valid(); it.Next() { 581 if !isTagKey(it.Key()) { 582 continue 583 } 584 585 if _, ok := qr.AnyBound().(int64); ok { 586 v, err := strconv.ParseInt(extractValueFromKey(it.Key()), 10, 64) 587 if err != nil { 588 continue LOOP 589 } 590 591 if matchEvents && qr.Key != types.TxHeightKey { 592 keyHeight, err := extractHeightFromKey(it.Key()) 593 if err != nil || !checkHeightConditions(heightInfo, keyHeight) { 594 continue LOOP 595 } 596 } 597 if checkBounds(qr, v) { 598 txi.setTmpHashes(tmpHashes, it, matchEvents) 599 } 600 601 // XXX: passing time in a ABCI Events is not yet implemented 602 // case time.Time: 603 // v := strconv.ParseInt(extractValueFromKey(it.Key()), 10, 64) 604 // if v == r.upperBound { 605 // break 606 // } 607 } 608 609 // Potentially exit early. 610 select { 611 case <-ctx.Done(): 612 break LOOP 613 default: 614 } 615 } 616 if err := it.Error(); err != nil { 617 panic(err) 618 } 619 620 if len(tmpHashes) == 0 || firstRun { 621 // Either: 622 // 623 // 1. Regardless if a previous match was attempted, which may have had 624 // results, but no match was found for the current condition, then we 625 // return no matches (assuming AND operand). 626 // 627 // 2. A previous match was not attempted, so we return all results. 628 return tmpHashes 629 } 630 631 // Remove/reduce matches in filteredHashes that were not found in this 632 // match (tmpHashes). 633 REMOVE_LOOP: 634 for k, v := range filteredHashes { 635 tmpHash := tmpHashes[k] 636 if tmpHash == nil || !bytes.Equal(tmpHashes[k], v) { 637 delete(filteredHashes, k) 638 639 // Potentially exit early. 640 select { 641 case <-ctx.Done(): 642 break REMOVE_LOOP 643 default: 644 } 645 } 646 } 647 648 return filteredHashes 649 } 650 651 // Keys 652 653 func isTagKey(key []byte) bool { 654 // Normally, if the event was indexed with an event sequence, the number of 655 // tags should 4. Alternatively it should be 3 if the event was not indexed 656 // with the corresponding event sequence. However, some attribute values in 657 // production can contain the tag separator. Therefore, the condition is >= 3. 658 numTags := strings.Count(string(key), tagKeySeparator) 659 return numTags >= 3 660 } 661 662 func extractHeightFromKey(key []byte) (int64, error) { 663 parts := strings.SplitN(string(key), tagKeySeparator, -1) 664 return strconv.ParseInt(parts[len(parts)-2], 10, 64) 665 } 666 func extractValueFromKey(key []byte) string { 667 keyString := string(key) 668 parts := strings.SplitN(keyString, tagKeySeparator, -1) 669 partsLen := len(parts) 670 value := strings.TrimPrefix(keyString, parts[0]+tagKeySeparator) 671 672 suffix := "" 673 suffixLen := 2 674 675 for i := 1; i <= suffixLen; i++ { 676 suffix = tagKeySeparator + parts[partsLen-i] + suffix 677 } 678 return strings.TrimSuffix(value, suffix) 679 } 680 681 func extractEventSeqFromKey(key []byte) string { 682 parts := strings.SplitN(string(key), tagKeySeparator, -1) 683 684 lastEl := parts[len(parts)-1] 685 686 if strings.Contains(lastEl, eventSeqSeparator) { 687 return strings.SplitN(lastEl, eventSeqSeparator, 2)[1] 688 } 689 return "0" 690 } 691 func keyForEvent(key string, value string, result *abci.TxResult, eventSeq int64) []byte { 692 return []byte(fmt.Sprintf("%s/%s/%d/%d%s", 693 key, 694 value, 695 result.Height, 696 result.Index, 697 eventSeqSeparator+strconv.FormatInt(eventSeq, 10), 698 )) 699 } 700 701 func keyForHeight(result *abci.TxResult) []byte { 702 return []byte(fmt.Sprintf("%s/%d/%d/%d%s", 703 types.TxHeightKey, 704 result.Height, 705 result.Height, 706 result.Index, 707 // Added to facilitate having the eventSeq in event keys 708 // Otherwise queries break expecting 5 entries 709 eventSeqSeparator+"0", 710 )) 711 } 712 713 func startKeyForCondition(c query.Condition, height int64) []byte { 714 if height > 0 { 715 return startKey(c.CompositeKey, c.Operand, height) 716 } 717 return startKey(c.CompositeKey, c.Operand) 718 } 719 720 func startKey(fields ...interface{}) []byte { 721 var b bytes.Buffer 722 for _, f := range fields { 723 b.Write([]byte(fmt.Sprintf("%v", f) + tagKeySeparator)) 724 } 725 return b.Bytes() 726 } 727 728 func checkBounds(ranges indexer.QueryRange, v int64) bool { 729 include := true 730 lowerBound := ranges.LowerBoundValue() 731 upperBound := ranges.UpperBoundValue() 732 if lowerBound != nil && v < lowerBound.(int64) { 733 include = false 734 } 735 736 if upperBound != nil && v > upperBound.(int64) { 737 include = false 738 } 739 740 return include 741 }