github.com/decred/dcrd/blockchain@v1.2.1/indexers/txindex.go (about) 1 // Copyright (c) 2016 The btcsuite developers 2 // Copyright (c) 2016-2017 The Decred developers 3 // Use of this source code is governed by an ISC 4 // license that can be found in the LICENSE file. 5 6 package indexers 7 8 import ( 9 "errors" 10 "fmt" 11 12 "github.com/decred/dcrd/blockchain" 13 "github.com/decred/dcrd/chaincfg/chainhash" 14 "github.com/decred/dcrd/database" 15 "github.com/decred/dcrd/dcrutil" 16 "github.com/decred/dcrd/wire" 17 ) 18 19 const ( 20 // txIndexName is the human-readable name for the index. 21 txIndexName = "transaction index" 22 23 // txIndexVersion is the current version of the transaction index. 24 txIndexVersion = 2 25 26 // txEntrySize is the size of a transaction entry. It consists of 4 27 // bytes block id + 4 bytes offset + 4 bytes length + 4 bytes block 28 // index. 29 txEntrySize = 4 + 4 + 4 + 4 30 ) 31 32 var ( 33 // txIndexKey is the key of the transaction index and the db bucket used 34 // to house it. 35 txIndexKey = []byte("txbyhashidx") 36 37 // idByHashIndexBucketName is the name of the db bucket used to house 38 // the block id -> block hash index. 39 idByHashIndexBucketName = []byte("idbyhashidx") 40 41 // hashByIDIndexBucketName is the name of the db bucket used to house 42 // the block hash -> block id index. 43 hashByIDIndexBucketName = []byte("hashbyididx") 44 45 // errNoBlockIDEntry is an error that indicates a requested entry does 46 // not exist in the block ID index. 47 errNoBlockIDEntry = errors.New("no entry in the block ID index") 48 ) 49 50 // ----------------------------------------------------------------------------- 51 // The transaction index consists of an entry for every transaction in the main 52 // chain. In order to significantly optimize the space requirements a separate 53 // index which provides an internal mapping between each block that has been 54 // indexed and a unique ID for use within the hash to location mappings. The ID 55 // is simply a sequentially incremented uint32. This is useful because it is 56 // only 4 bytes versus 32 bytes hashes and thus saves a ton of space in the 57 // index. 58 // 59 // There are three buckets used in total. The first bucket maps the hash of 60 // each transaction to the specific block location. The second bucket maps the 61 // hash of each block to the unique ID and the third maps that ID back to the 62 // block hash. 63 // 64 // NOTE: Although it is technically possible for multiple transactions to have 65 // the same hash as long as the previous transaction with the same hash is fully 66 // spent, this code only stores the most recent one because doing otherwise 67 // would add a non-trivial amount of space and overhead for something that will 68 // realistically never happen per the probability and even if it did, the old 69 // one must be fully spent and so the most likely transaction a caller would 70 // want for a given hash is the most recent one anyways. 71 // 72 // The serialized format for keys and values in the block hash to ID bucket is: 73 // <hash> = <ID> 74 // 75 // Field Type Size 76 // hash chainhash.Hash 32 bytes 77 // ID uint32 4 bytes 78 // ----- 79 // Total: 36 bytes 80 // 81 // The serialized format for keys and values in the ID to block hash bucket is: 82 // <ID> = <hash> 83 // 84 // Field Type Size 85 // ID uint32 4 bytes 86 // hash chainhash.Hash 32 bytes 87 // ----- 88 // Total: 36 bytes 89 // 90 // The serialized format for the keys and values in the tx index bucket is: 91 // 92 // <txhash> = <block id><start offset><tx length><block index> 93 // 94 // Field Type Size 95 // txhash chainhash.Hash 32 bytes 96 // block id uint32 4 bytes 97 // start offset uint32 4 bytes 98 // tx length uint32 4 bytes 99 // block index uint32 4 bytes 100 // ----- 101 // Total: 48 bytes 102 // ----------------------------------------------------------------------------- 103 104 // TxIndexEntry houses information about an entry in the transaction index. 105 type TxIndexEntry struct { 106 // BlockRegion specifies the location of the raw bytes of the transaction. 107 BlockRegion database.BlockRegion 108 109 // BlockIndex species the index of the transaction within the array of 110 // transactions that comprise a tree of the block. 111 BlockIndex uint32 112 } 113 114 // dbPutBlockIDIndexEntry uses an existing database transaction to update or add 115 // the index entries for the hash to id and id to hash mappings for the provided 116 // values. 117 func dbPutBlockIDIndexEntry(dbTx database.Tx, hash *chainhash.Hash, id uint32) error { 118 // Serialize the height for use in the index entries. 119 var serializedID [4]byte 120 byteOrder.PutUint32(serializedID[:], id) 121 122 // Add the block hash to ID mapping to the index. 123 meta := dbTx.Metadata() 124 hashIndex := meta.Bucket(idByHashIndexBucketName) 125 if err := hashIndex.Put(hash[:], serializedID[:]); err != nil { 126 return err 127 } 128 129 // Add the block ID to hash mapping to the index. 130 idIndex := meta.Bucket(hashByIDIndexBucketName) 131 return idIndex.Put(serializedID[:], hash[:]) 132 } 133 134 // dbRemoveBlockIDIndexEntry uses an existing database transaction remove index 135 // entries from the hash to id and id to hash mappings for the provided hash. 136 func dbRemoveBlockIDIndexEntry(dbTx database.Tx, hash *chainhash.Hash) error { 137 // Remove the block hash to ID mapping. 138 meta := dbTx.Metadata() 139 hashIndex := meta.Bucket(idByHashIndexBucketName) 140 serializedID := hashIndex.Get(hash[:]) 141 if serializedID == nil { 142 return nil 143 } 144 if err := hashIndex.Delete(hash[:]); err != nil { 145 return err 146 } 147 148 // Remove the block ID to hash mapping. 149 idIndex := meta.Bucket(hashByIDIndexBucketName) 150 return idIndex.Delete(serializedID) 151 } 152 153 // dbFetchBlockIDByHash uses an existing database transaction to retrieve the 154 // block id for the provided hash from the index. 155 func dbFetchBlockIDByHash(dbTx database.Tx, hash *chainhash.Hash) (uint32, error) { 156 hashIndex := dbTx.Metadata().Bucket(idByHashIndexBucketName) 157 serializedID := hashIndex.Get(hash[:]) 158 if serializedID == nil { 159 return 0, errNoBlockIDEntry 160 } 161 162 return byteOrder.Uint32(serializedID), nil 163 } 164 165 // dbFetchBlockHashBySerializedID uses an existing database transaction to 166 // retrieve the hash for the provided serialized block id from the index. 167 func dbFetchBlockHashBySerializedID(dbTx database.Tx, serializedID []byte) (*chainhash.Hash, error) { 168 idIndex := dbTx.Metadata().Bucket(hashByIDIndexBucketName) 169 hashBytes := idIndex.Get(serializedID) 170 if hashBytes == nil { 171 return nil, errNoBlockIDEntry 172 } 173 174 var hash chainhash.Hash 175 copy(hash[:], hashBytes) 176 return &hash, nil 177 } 178 179 // dbFetchBlockHashByID uses an existing database transaction to retrieve the 180 // hash for the provided block id from the index. 181 func dbFetchBlockHashByID(dbTx database.Tx, id uint32) (*chainhash.Hash, error) { 182 var serializedID [4]byte 183 byteOrder.PutUint32(serializedID[:], id) 184 return dbFetchBlockHashBySerializedID(dbTx, serializedID[:]) 185 } 186 187 // putTxIndexEntry serializes the provided values according to the format 188 // described about for a transaction index entry. The target byte slice must 189 // be at least large enough to handle the number of bytes defined by the 190 // txEntrySize constant or it will panic. 191 func putTxIndexEntry(target []byte, blockID uint32, txLoc wire.TxLoc, blockIndex uint32) { 192 byteOrder.PutUint32(target, blockID) 193 byteOrder.PutUint32(target[4:], uint32(txLoc.TxStart)) 194 byteOrder.PutUint32(target[8:], uint32(txLoc.TxLen)) 195 byteOrder.PutUint32(target[12:], blockIndex) 196 } 197 198 // dbPutTxIndexEntry uses an existing database transaction to update the 199 // transaction index given the provided serialized data that is expected to have 200 // been serialized putTxIndexEntry. 201 func dbPutTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash, serializedData []byte) error { 202 txIndex := dbTx.Metadata().Bucket(txIndexKey) 203 return txIndex.Put(txHash[:], serializedData) 204 } 205 206 // dbFetchTxIndexEntry uses an existing database transaction to fetch the block 207 // region for the provided transaction hash from the transaction index. When 208 // there is no entry for the provided hash, nil will be returned for the both 209 // the region and the error. 210 func dbFetchTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash) (*TxIndexEntry, error) { 211 // Load the record from the database and return now if it doesn't exist. 212 txIndex := dbTx.Metadata().Bucket(txIndexKey) 213 serializedData := txIndex.Get(txHash[:]) 214 if len(serializedData) == 0 { 215 return nil, nil 216 } 217 218 // Ensure the serialized data has enough bytes to properly deserialize. 219 if len(serializedData) < txEntrySize { 220 return nil, database.Error{ 221 ErrorCode: database.ErrCorruption, 222 Description: fmt.Sprintf("corrupt transaction index "+ 223 "entry for %s", txHash), 224 } 225 } 226 227 // Load the block hash associated with the block ID. 228 hash, err := dbFetchBlockHashBySerializedID(dbTx, serializedData[0:4]) 229 if err != nil { 230 return nil, database.Error{ 231 ErrorCode: database.ErrCorruption, 232 Description: fmt.Sprintf("corrupt transaction index "+ 233 "entry for %s: %v", txHash, err), 234 } 235 } 236 237 // Deserialize the final entry. 238 entry := TxIndexEntry{ 239 BlockRegion: database.BlockRegion{ 240 Hash: new(chainhash.Hash), 241 Offset: byteOrder.Uint32(serializedData[4:8]), 242 Len: byteOrder.Uint32(serializedData[8:12]), 243 }, 244 BlockIndex: byteOrder.Uint32(serializedData[12:16]), 245 } 246 copy(entry.BlockRegion.Hash[:], hash[:]) 247 return &entry, nil 248 } 249 250 // dbAddTxIndexEntries uses an existing database transaction to add a 251 // transaction index entry for every transaction in the parent of the passed 252 // block (if they were valid) and every stake transaction in the passed block. 253 func dbAddTxIndexEntries(dbTx database.Tx, block *dcrutil.Block, blockID uint32) error { 254 // The offset and length of the transactions within the serialized block. 255 txLocs, stakeTxLocs, err := block.TxLoc() 256 if err != nil { 257 return err 258 } 259 260 // As an optimization, allocate a single slice big enough to hold all 261 // of the serialized transaction index entries for the block and 262 // serialize them directly into the slice. Then, pass the appropriate 263 // subslice to the database to be written. This approach significantly 264 // cuts down on the number of required allocations. 265 addEntries := func(txns []*dcrutil.Tx, txLocs []wire.TxLoc, blockID uint32) error { 266 offset := 0 267 serializedValues := make([]byte, len(txns)*txEntrySize) 268 for i, tx := range txns { 269 putTxIndexEntry(serializedValues[offset:], blockID, txLocs[i], 270 uint32(i)) 271 endOffset := offset + txEntrySize 272 err := dbPutTxIndexEntry(dbTx, tx.Hash(), 273 serializedValues[offset:endOffset:endOffset]) 274 if err != nil { 275 return err 276 } 277 offset += txEntrySize 278 } 279 return nil 280 } 281 282 // Add the regular tree transactions. 283 err = addEntries(block.Transactions(), txLocs, blockID) 284 if err != nil { 285 return err 286 } 287 288 // Add the stake tree transactions. 289 return addEntries(block.STransactions(), stakeTxLocs, blockID) 290 } 291 292 // dbRemoveTxIndexEntry uses an existing database transaction to remove the most 293 // recent transaction index entry for the given hash. 294 func dbRemoveTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash) error { 295 txIndex := dbTx.Metadata().Bucket(txIndexKey) 296 serializedData := txIndex.Get(txHash[:]) 297 if len(serializedData) == 0 { 298 return fmt.Errorf("can't remove non-existent transaction %s "+ 299 "from the transaction index", txHash) 300 } 301 302 return txIndex.Delete(txHash[:]) 303 } 304 305 // dbRemoveTxIndexEntries uses an existing database transaction to remove the 306 // latest transaction entry for every transaction in the parent of the passed 307 // block (if they were valid) and every stake transaction in the passed block. 308 func dbRemoveTxIndexEntries(dbTx database.Tx, block *dcrutil.Block) error { 309 removeEntries := func(txns []*dcrutil.Tx) error { 310 for _, tx := range txns { 311 err := dbRemoveTxIndexEntry(dbTx, tx.Hash()) 312 if err != nil { 313 return err 314 } 315 } 316 return nil 317 } 318 319 // Remove the regular and stake tree transactions from the block being 320 // disconnected. 321 if err := removeEntries(block.Transactions()); err != nil { 322 return err 323 } 324 return removeEntries(block.STransactions()) 325 } 326 327 // TxIndex implements a transaction by hash index. That is to say, it supports 328 // querying all transactions by their hash. 329 type TxIndex struct { 330 db database.DB 331 curBlockID uint32 332 } 333 334 // Ensure the TxIndex type implements the Indexer interface. 335 var _ Indexer = (*TxIndex)(nil) 336 337 // Init initializes the hash-based transaction index. In particular, it finds 338 // the highest used block ID and stores it for later use when connecting or 339 // disconnecting blocks. 340 // 341 // This is part of the Indexer interface. 342 func (idx *TxIndex) Init() error { 343 // Find the latest known block id field for the internal block id 344 // index and initialize it. This is done because it's a lot more 345 // efficient to do a single search at initialize time than it is to 346 // write another value to the database on every update. 347 err := idx.db.View(func(dbTx database.Tx) error { 348 // Scan forward in large gaps to find a block id that doesn't 349 // exist yet to serve as an upper bound for the binary search 350 // below. 351 var highestKnown, nextUnknown uint32 352 testBlockID := uint32(1) 353 increment := uint32(100000) 354 for { 355 _, err := dbFetchBlockHashByID(dbTx, testBlockID) 356 if err != nil { 357 nextUnknown = testBlockID 358 break 359 } 360 361 highestKnown = testBlockID 362 testBlockID += increment 363 } 364 log.Tracef("Forward scan (highest known %d, next unknown %d)", 365 highestKnown, nextUnknown) 366 367 // No used block IDs due to new database. 368 if nextUnknown == 1 { 369 return nil 370 } 371 372 // Use a binary search to find the final highest used block id. 373 // This will take at most ceil(log_2(increment)) attempts. 374 for { 375 testBlockID = (highestKnown + nextUnknown) / 2 376 _, err := dbFetchBlockHashByID(dbTx, testBlockID) 377 if err != nil { 378 nextUnknown = testBlockID 379 } else { 380 highestKnown = testBlockID 381 } 382 log.Tracef("Binary scan (highest known %d, next "+ 383 "unknown %d)", highestKnown, nextUnknown) 384 if highestKnown+1 == nextUnknown { 385 break 386 } 387 } 388 389 idx.curBlockID = highestKnown 390 return nil 391 }) 392 if err != nil { 393 return err 394 } 395 396 log.Debugf("Current internal block ID: %d", idx.curBlockID) 397 return nil 398 } 399 400 // Key returns the database key to use for the index as a byte slice. 401 // 402 // This is part of the Indexer interface. 403 func (idx *TxIndex) Key() []byte { 404 return txIndexKey 405 } 406 407 // Name returns the human-readable name of the index. 408 // 409 // This is part of the Indexer interface. 410 func (idx *TxIndex) Name() string { 411 return txIndexName 412 } 413 414 // Version returns the current version of the index. 415 // 416 // This is part of the Indexer interface. 417 func (idx *TxIndex) Version() uint32 { 418 return txIndexVersion 419 } 420 421 // Create is invoked when the indexer manager determines the index needs 422 // to be created for the first time. It creates the buckets for the hash-based 423 // transaction index and the internal block ID indexes. 424 // 425 // This is part of the Indexer interface. 426 func (idx *TxIndex) Create(dbTx database.Tx) error { 427 meta := dbTx.Metadata() 428 if _, err := meta.CreateBucket(idByHashIndexBucketName); err != nil { 429 return err 430 } 431 if _, err := meta.CreateBucket(hashByIDIndexBucketName); err != nil { 432 return err 433 } 434 _, err := meta.CreateBucket(txIndexKey) 435 return err 436 } 437 438 // ConnectBlock is invoked by the index manager when a new block has been 439 // connected to the main chain. This indexer adds a hash-to-transaction mapping 440 // for every transaction in the passed block. 441 // 442 // This is part of the Indexer interface. 443 func (idx *TxIndex) ConnectBlock(dbTx database.Tx, block, parent *dcrutil.Block, view *blockchain.UtxoViewpoint) error { 444 // NOTE: The fact that the block can disapprove the regular tree of the 445 // previous block is ignored for this index because even though the 446 // disapproved transactions no longer apply spend semantics, they still 447 // exist within the block and thus have to be processed before the next 448 // block disapproves them. 449 // 450 // Also, the transaction index is keyed by hash and only supports a single 451 // transaction per hash. This means that if the disapproved transaction 452 // is mined into a later block, as is typically the case, only that most 453 // recent one can be queried. Ideally, it should probably support multiple 454 // transactions per hash, which would not only allow access in the case 455 // just described, but it would also allow indexing of transactions that 456 // happen to have the same hash (granted the probability of this is 457 // extremely low), which is supported so long as the previous one is 458 // fully spent. 459 460 // Increment the internal block ID to use for the block being connected 461 // and add all of the transactions in the block to the index. 462 newBlockID := idx.curBlockID + 1 463 if err := dbAddTxIndexEntries(dbTx, block, newBlockID); err != nil { 464 return err 465 } 466 467 // Add the new block ID index entry for the block being connected and 468 // update the current internal block ID accordingly. 469 err := dbPutBlockIDIndexEntry(dbTx, block.Hash(), newBlockID) 470 if err != nil { 471 return err 472 } 473 idx.curBlockID = newBlockID 474 return nil 475 } 476 477 // DisconnectBlock is invoked by the index manager when a block has been 478 // disconnected from the main chain. This indexer removes the 479 // hash-to-transaction mapping for every transaction in the block. 480 // 481 // This is part of the Indexer interface. 482 func (idx *TxIndex) DisconnectBlock(dbTx database.Tx, block, parent *dcrutil.Block, view *blockchain.UtxoViewpoint) error { 483 // NOTE: The fact that the block can disapprove the regular tree of the 484 // previous block is ignored when disconnecting blocks because it is also 485 // ignored when connecting the block. See the comments in ConnectBlock for 486 // the specifics. 487 488 // Remove all of the transactions in the block from the index. 489 if err := dbRemoveTxIndexEntries(dbTx, block); err != nil { 490 return err 491 } 492 493 // Remove the block ID index entry for the block being disconnected and 494 // decrement the current internal block ID to account for it. 495 if err := dbRemoveBlockIDIndexEntry(dbTx, block.Hash()); err != nil { 496 return err 497 } 498 idx.curBlockID-- 499 return nil 500 } 501 502 // Entry returns details for the provided transaction hash from the transaction 503 // index. The block region contained in the result can in turn be used to load 504 // the raw transaction bytes. When there is no entry for the provided hash, nil 505 // will be returned for the both the entry and the error. 506 // 507 // This function is safe for concurrent access. 508 func (idx *TxIndex) Entry(hash *chainhash.Hash) (*TxIndexEntry, error) { 509 var entry *TxIndexEntry 510 err := idx.db.View(func(dbTx database.Tx) error { 511 var err error 512 entry, err = dbFetchTxIndexEntry(dbTx, hash) 513 return err 514 }) 515 return entry, err 516 } 517 518 // NewTxIndex returns a new instance of an indexer that is used to create a 519 // mapping of the hashes of all transactions in the blockchain to the respective 520 // block, location within the block, and size of the transaction. 521 // 522 // It implements the Indexer interface which plugs into the IndexManager that in 523 // turn is used by the blockchain package. This allows the index to be 524 // seamlessly maintained along with the chain. 525 func NewTxIndex(db database.DB) *TxIndex { 526 return &TxIndex{db: db} 527 } 528 529 // dropBlockIDIndex drops the internal block id index. 530 func dropBlockIDIndex(db database.DB) error { 531 return db.Update(func(dbTx database.Tx) error { 532 meta := dbTx.Metadata() 533 err := meta.DeleteBucket(idByHashIndexBucketName) 534 if err != nil { 535 return err 536 } 537 538 return meta.DeleteBucket(hashByIDIndexBucketName) 539 }) 540 } 541 542 // DropTxIndex drops the transaction index from the provided database if it 543 // exists. Since the address index relies on it, the address index will also be 544 // dropped when it exists. 545 func DropTxIndex(db database.DB, interrupt <-chan struct{}) error { 546 // Nothing to do if the index doesn't already exist. 547 exists, err := existsIndex(db, txIndexKey, txIndexName) 548 if err != nil { 549 return err 550 } 551 if !exists { 552 log.Infof("Not dropping %s because it does not exist", txIndexName) 553 return nil 554 } 555 556 // Mark that the index is in the process of being dropped so that it 557 // can be resumed on the next start if interrupted before the process is 558 // complete. 559 err = markIndexDeletion(db, txIndexKey) 560 if err != nil { 561 return err 562 } 563 564 // Drop the address index if it exists, as it depends on the transaction 565 // index. 566 err = DropAddrIndex(db, interrupt) 567 if err != nil { 568 return err 569 } 570 571 log.Infof("Dropping all %s entries. This might take a while...", 572 txIndexName) 573 574 // Since the indexes can be so large, attempting to simply delete 575 // the bucket in a single database transaction would result in massive 576 // memory usage and likely crash many systems due to ulimits. In order 577 // to avoid this, use a cursor to delete a maximum number of entries out 578 // of the bucket at a time. 579 err = incrementalFlatDrop(db, txIndexKey, txIndexName, interrupt) 580 if err != nil { 581 return err 582 } 583 584 // Call extra index specific deinitialization for the transaction index. 585 err = dropBlockIDIndex(db) 586 if err != nil { 587 return err 588 } 589 590 // Remove the index tip, version, bucket, and in-progress drop flag now 591 // that all index entries have been removed. 592 err = dropIndexMetadata(db, txIndexKey, txIndexName) 593 if err != nil { 594 return err 595 } 596 597 log.Infof("Dropped %s", txIndexName) 598 return nil 599 } 600 601 // DropIndex drops the transaction index from the provided database if it 602 // exists. Since the address index relies on it, the address index will also be 603 // dropped when it exists. 604 func (*TxIndex) DropIndex(db database.DB, interrupt <-chan struct{}) error { 605 return DropTxIndex(db, interrupt) 606 }