github.com/palcoin-project/palcd@v1.0.0/blockchain/indexers/txindex.go (about) 1 // Copyright (c) 2016 The btcsuite developers 2 // Use of this source code is governed by an ISC 3 // license that can be found in the LICENSE file. 4 5 package indexers 6 7 import ( 8 "errors" 9 "fmt" 10 11 "github.com/palcoin-project/palcd/blockchain" 12 "github.com/palcoin-project/palcd/chaincfg/chainhash" 13 "github.com/palcoin-project/palcd/database" 14 "github.com/palcoin-project/palcd/wire" 15 "github.com/palcoin-project/palcutil" 16 ) 17 18 const ( 19 // txIndexName is the human-readable name for the index. 20 txIndexName = "transaction index" 21 ) 22 23 var ( 24 // txIndexKey is the key of the transaction index and the db bucket used 25 // to house it. 26 txIndexKey = []byte("txbyhashidx") 27 28 // idByHashIndexBucketName is the name of the db bucket used to house 29 // the block id -> block hash index. 30 idByHashIndexBucketName = []byte("idbyhashidx") 31 32 // hashByIDIndexBucketName is the name of the db bucket used to house 33 // the block hash -> block id index. 34 hashByIDIndexBucketName = []byte("hashbyididx") 35 36 // errNoBlockIDEntry is an error that indicates a requested entry does 37 // not exist in the block ID index. 38 errNoBlockIDEntry = errors.New("no entry in the block ID index") 39 ) 40 41 // ----------------------------------------------------------------------------- 42 // The transaction index consists of an entry for every transaction in the main 43 // chain. In order to significantly optimize the space requirements a separate 44 // index which provides an internal mapping between each block that has been 45 // indexed and a unique ID for use within the hash to location mappings. The ID 46 // is simply a sequentially incremented uint32. This is useful because it is 47 // only 4 bytes versus 32 bytes hashes and thus saves a ton of space in the 48 // index. 49 // 50 // There are three buckets used in total. The first bucket maps the hash of 51 // each transaction to the specific block location. The second bucket maps the 52 // hash of each block to the unique ID and the third maps that ID back to the 53 // block hash. 54 // 55 // NOTE: Although it is technically possible for multiple transactions to have 56 // the same hash as long as the previous transaction with the same hash is fully 57 // spent, this code only stores the most recent one because doing otherwise 58 // would add a non-trivial amount of space and overhead for something that will 59 // realistically never happen per the probability and even if it did, the old 60 // one must be fully spent and so the most likely transaction a caller would 61 // want for a given hash is the most recent one anyways. 62 // 63 // The serialized format for keys and values in the block hash to ID bucket is: 64 // <hash> = <ID> 65 // 66 // Field Type Size 67 // hash chainhash.Hash 32 bytes 68 // ID uint32 4 bytes 69 // ----- 70 // Total: 36 bytes 71 // 72 // The serialized format for keys and values in the ID to block hash bucket is: 73 // <ID> = <hash> 74 // 75 // Field Type Size 76 // ID uint32 4 bytes 77 // hash chainhash.Hash 32 bytes 78 // ----- 79 // Total: 36 bytes 80 // 81 // The serialized format for the keys and values in the tx index bucket is: 82 // 83 // <txhash> = <block id><start offset><tx length> 84 // 85 // Field Type Size 86 // txhash chainhash.Hash 32 bytes 87 // block id uint32 4 bytes 88 // start offset uint32 4 bytes 89 // tx length uint32 4 bytes 90 // ----- 91 // Total: 44 bytes 92 // ----------------------------------------------------------------------------- 93 94 // dbPutBlockIDIndexEntry uses an existing database transaction to update or add 95 // the index entries for the hash to id and id to hash mappings for the provided 96 // values. 97 func dbPutBlockIDIndexEntry(dbTx database.Tx, hash *chainhash.Hash, id uint32) error { 98 // Serialize the height for use in the index entries. 99 var serializedID [4]byte 100 byteOrder.PutUint32(serializedID[:], id) 101 102 // Add the block hash to ID mapping to the index. 103 meta := dbTx.Metadata() 104 hashIndex := meta.Bucket(idByHashIndexBucketName) 105 if err := hashIndex.Put(hash[:], serializedID[:]); err != nil { 106 return err 107 } 108 109 // Add the block ID to hash mapping to the index. 110 idIndex := meta.Bucket(hashByIDIndexBucketName) 111 return idIndex.Put(serializedID[:], hash[:]) 112 } 113 114 // dbRemoveBlockIDIndexEntry uses an existing database transaction remove index 115 // entries from the hash to id and id to hash mappings for the provided hash. 116 func dbRemoveBlockIDIndexEntry(dbTx database.Tx, hash *chainhash.Hash) error { 117 // Remove the block hash to ID mapping. 118 meta := dbTx.Metadata() 119 hashIndex := meta.Bucket(idByHashIndexBucketName) 120 serializedID := hashIndex.Get(hash[:]) 121 if serializedID == nil { 122 return nil 123 } 124 if err := hashIndex.Delete(hash[:]); err != nil { 125 return err 126 } 127 128 // Remove the block ID to hash mapping. 129 idIndex := meta.Bucket(hashByIDIndexBucketName) 130 return idIndex.Delete(serializedID) 131 } 132 133 // dbFetchBlockIDByHash uses an existing database transaction to retrieve the 134 // block id for the provided hash from the index. 135 func dbFetchBlockIDByHash(dbTx database.Tx, hash *chainhash.Hash) (uint32, error) { 136 hashIndex := dbTx.Metadata().Bucket(idByHashIndexBucketName) 137 serializedID := hashIndex.Get(hash[:]) 138 if serializedID == nil { 139 return 0, errNoBlockIDEntry 140 } 141 142 return byteOrder.Uint32(serializedID), nil 143 } 144 145 // dbFetchBlockHashBySerializedID uses an existing database transaction to 146 // retrieve the hash for the provided serialized block id from the index. 147 func dbFetchBlockHashBySerializedID(dbTx database.Tx, serializedID []byte) (*chainhash.Hash, error) { 148 idIndex := dbTx.Metadata().Bucket(hashByIDIndexBucketName) 149 hashBytes := idIndex.Get(serializedID) 150 if hashBytes == nil { 151 return nil, errNoBlockIDEntry 152 } 153 154 var hash chainhash.Hash 155 copy(hash[:], hashBytes) 156 return &hash, nil 157 } 158 159 // dbFetchBlockHashByID uses an existing database transaction to retrieve the 160 // hash for the provided block id from the index. 161 func dbFetchBlockHashByID(dbTx database.Tx, id uint32) (*chainhash.Hash, error) { 162 var serializedID [4]byte 163 byteOrder.PutUint32(serializedID[:], id) 164 return dbFetchBlockHashBySerializedID(dbTx, serializedID[:]) 165 } 166 167 // putTxIndexEntry serializes the provided values according to the format 168 // described about for a transaction index entry. The target byte slice must 169 // be at least large enough to handle the number of bytes defined by the 170 // txEntrySize constant or it will panic. 171 func putTxIndexEntry(target []byte, blockID uint32, txLoc wire.TxLoc) { 172 byteOrder.PutUint32(target, blockID) 173 byteOrder.PutUint32(target[4:], uint32(txLoc.TxStart)) 174 byteOrder.PutUint32(target[8:], uint32(txLoc.TxLen)) 175 } 176 177 // dbPutTxIndexEntry uses an existing database transaction to update the 178 // transaction index given the provided serialized data that is expected to have 179 // been serialized putTxIndexEntry. 180 func dbPutTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash, serializedData []byte) error { 181 txIndex := dbTx.Metadata().Bucket(txIndexKey) 182 return txIndex.Put(txHash[:], serializedData) 183 } 184 185 // dbFetchTxIndexEntry uses an existing database transaction to fetch the block 186 // region for the provided transaction hash from the transaction index. When 187 // there is no entry for the provided hash, nil will be returned for the both 188 // the region and the error. 189 func dbFetchTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash) (*database.BlockRegion, error) { 190 // Load the record from the database and return now if it doesn't exist. 191 txIndex := dbTx.Metadata().Bucket(txIndexKey) 192 serializedData := txIndex.Get(txHash[:]) 193 if len(serializedData) == 0 { 194 return nil, nil 195 } 196 197 // Ensure the serialized data has enough bytes to properly deserialize. 198 if len(serializedData) < 12 { 199 return nil, database.Error{ 200 ErrorCode: database.ErrCorruption, 201 Description: fmt.Sprintf("corrupt transaction index "+ 202 "entry for %s", txHash), 203 } 204 } 205 206 // Load the block hash associated with the block ID. 207 hash, err := dbFetchBlockHashBySerializedID(dbTx, serializedData[0:4]) 208 if err != nil { 209 return nil, database.Error{ 210 ErrorCode: database.ErrCorruption, 211 Description: fmt.Sprintf("corrupt transaction index "+ 212 "entry for %s: %v", txHash, err), 213 } 214 } 215 216 // Deserialize the final entry. 217 region := database.BlockRegion{Hash: &chainhash.Hash{}} 218 copy(region.Hash[:], hash[:]) 219 region.Offset = byteOrder.Uint32(serializedData[4:8]) 220 region.Len = byteOrder.Uint32(serializedData[8:12]) 221 222 return ®ion, nil 223 } 224 225 // dbAddTxIndexEntries uses an existing database transaction to add a 226 // transaction index entry for every transaction in the passed block. 227 func dbAddTxIndexEntries(dbTx database.Tx, block *palcutil.Block, blockID uint32) error { 228 // The offset and length of the transactions within the serialized 229 // block. 230 txLocs, err := block.TxLoc() 231 if err != nil { 232 return err 233 } 234 235 // As an optimization, allocate a single slice big enough to hold all 236 // of the serialized transaction index entries for the block and 237 // serialize them directly into the slice. Then, pass the appropriate 238 // subslice to the database to be written. This approach significantly 239 // cuts down on the number of required allocations. 240 offset := 0 241 serializedValues := make([]byte, len(block.Transactions())*txEntrySize) 242 for i, tx := range block.Transactions() { 243 putTxIndexEntry(serializedValues[offset:], blockID, txLocs[i]) 244 endOffset := offset + txEntrySize 245 err := dbPutTxIndexEntry(dbTx, tx.Hash(), 246 serializedValues[offset:endOffset:endOffset]) 247 if err != nil { 248 return err 249 } 250 offset += txEntrySize 251 } 252 253 return nil 254 } 255 256 // dbRemoveTxIndexEntry uses an existing database transaction to remove the most 257 // recent transaction index entry for the given hash. 258 func dbRemoveTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash) error { 259 txIndex := dbTx.Metadata().Bucket(txIndexKey) 260 serializedData := txIndex.Get(txHash[:]) 261 if len(serializedData) == 0 { 262 return fmt.Errorf("can't remove non-existent transaction %s "+ 263 "from the transaction index", txHash) 264 } 265 266 return txIndex.Delete(txHash[:]) 267 } 268 269 // dbRemoveTxIndexEntries uses an existing database transaction to remove the 270 // latest transaction entry for every transaction in the passed block. 271 func dbRemoveTxIndexEntries(dbTx database.Tx, block *palcutil.Block) error { 272 for _, tx := range block.Transactions() { 273 err := dbRemoveTxIndexEntry(dbTx, tx.Hash()) 274 if err != nil { 275 return err 276 } 277 } 278 279 return nil 280 } 281 282 // TxIndex implements a transaction by hash index. That is to say, it supports 283 // querying all transactions by their hash. 284 type TxIndex struct { 285 db database.DB 286 curBlockID uint32 287 } 288 289 // Ensure the TxIndex type implements the Indexer interface. 290 var _ Indexer = (*TxIndex)(nil) 291 292 // Init initializes the hash-based transaction index. In particular, it finds 293 // the highest used block ID and stores it for later use when connecting or 294 // disconnecting blocks. 295 // 296 // This is part of the Indexer interface. 297 func (idx *TxIndex) Init() error { 298 // Find the latest known block id field for the internal block id 299 // index and initialize it. This is done because it's a lot more 300 // efficient to do a single search at initialize time than it is to 301 // write another value to the database on every update. 302 err := idx.db.View(func(dbTx database.Tx) error { 303 // Scan forward in large gaps to find a block id that doesn't 304 // exist yet to serve as an upper bound for the binary search 305 // below. 306 var highestKnown, nextUnknown uint32 307 testBlockID := uint32(1) 308 increment := uint32(100000) 309 for { 310 _, err := dbFetchBlockHashByID(dbTx, testBlockID) 311 if err != nil { 312 nextUnknown = testBlockID 313 break 314 } 315 316 highestKnown = testBlockID 317 testBlockID += increment 318 } 319 log.Tracef("Forward scan (highest known %d, next unknown %d)", 320 highestKnown, nextUnknown) 321 322 // No used block IDs due to new database. 323 if nextUnknown == 1 { 324 return nil 325 } 326 327 // Use a binary search to find the final highest used block id. 328 // This will take at most ceil(log_2(increment)) attempts. 329 for { 330 testBlockID = (highestKnown + nextUnknown) / 2 331 _, err := dbFetchBlockHashByID(dbTx, testBlockID) 332 if err != nil { 333 nextUnknown = testBlockID 334 } else { 335 highestKnown = testBlockID 336 } 337 log.Tracef("Binary scan (highest known %d, next "+ 338 "unknown %d)", highestKnown, nextUnknown) 339 if highestKnown+1 == nextUnknown { 340 break 341 } 342 } 343 344 idx.curBlockID = highestKnown 345 return nil 346 }) 347 if err != nil { 348 return err 349 } 350 351 log.Debugf("Current internal block ID: %d", idx.curBlockID) 352 return nil 353 } 354 355 // Key returns the database key to use for the index as a byte slice. 356 // 357 // This is part of the Indexer interface. 358 func (idx *TxIndex) Key() []byte { 359 return txIndexKey 360 } 361 362 // Name returns the human-readable name of the index. 363 // 364 // This is part of the Indexer interface. 365 func (idx *TxIndex) Name() string { 366 return txIndexName 367 } 368 369 // Create is invoked when the indexer manager determines the index needs 370 // to be created for the first time. It creates the buckets for the hash-based 371 // transaction index and the internal block ID indexes. 372 // 373 // This is part of the Indexer interface. 374 func (idx *TxIndex) Create(dbTx database.Tx) error { 375 meta := dbTx.Metadata() 376 if _, err := meta.CreateBucket(idByHashIndexBucketName); err != nil { 377 return err 378 } 379 if _, err := meta.CreateBucket(hashByIDIndexBucketName); err != nil { 380 return err 381 } 382 _, err := meta.CreateBucket(txIndexKey) 383 return err 384 } 385 386 // ConnectBlock is invoked by the index manager when a new block has been 387 // connected to the main chain. This indexer adds a hash-to-transaction mapping 388 // for every transaction in the passed block. 389 // 390 // This is part of the Indexer interface. 391 func (idx *TxIndex) ConnectBlock(dbTx database.Tx, block *palcutil.Block, 392 stxos []blockchain.SpentTxOut) error { 393 394 // Increment the internal block ID to use for the block being connected 395 // and add all of the transactions in the block to the index. 396 newBlockID := idx.curBlockID + 1 397 if err := dbAddTxIndexEntries(dbTx, block, newBlockID); err != nil { 398 return err 399 } 400 401 // Add the new block ID index entry for the block being connected and 402 // update the current internal block ID accordingly. 403 err := dbPutBlockIDIndexEntry(dbTx, block.Hash(), newBlockID) 404 if err != nil { 405 return err 406 } 407 idx.curBlockID = newBlockID 408 return nil 409 } 410 411 // DisconnectBlock is invoked by the index manager when a block has been 412 // disconnected from the main chain. This indexer removes the 413 // hash-to-transaction mapping for every transaction in the block. 414 // 415 // This is part of the Indexer interface. 416 func (idx *TxIndex) DisconnectBlock(dbTx database.Tx, block *palcutil.Block, 417 stxos []blockchain.SpentTxOut) error { 418 419 // Remove all of the transactions in the block from the index. 420 if err := dbRemoveTxIndexEntries(dbTx, block); err != nil { 421 return err 422 } 423 424 // Remove the block ID index entry for the block being disconnected and 425 // decrement the current internal block ID to account for it. 426 if err := dbRemoveBlockIDIndexEntry(dbTx, block.Hash()); err != nil { 427 return err 428 } 429 idx.curBlockID-- 430 return nil 431 } 432 433 // TxBlockRegion returns the block region for the provided transaction hash 434 // from the transaction index. The block region can in turn be used to load the 435 // raw transaction bytes. When there is no entry for the provided hash, nil 436 // will be returned for the both the entry and the error. 437 // 438 // This function is safe for concurrent access. 439 func (idx *TxIndex) TxBlockRegion(hash *chainhash.Hash) (*database.BlockRegion, error) { 440 var region *database.BlockRegion 441 err := idx.db.View(func(dbTx database.Tx) error { 442 var err error 443 region, err = dbFetchTxIndexEntry(dbTx, hash) 444 return err 445 }) 446 return region, err 447 } 448 449 // NewTxIndex returns a new instance of an indexer that is used to create a 450 // mapping of the hashes of all transactions in the blockchain to the respective 451 // block, location within the block, and size of the transaction. 452 // 453 // It implements the Indexer interface which plugs into the IndexManager that in 454 // turn is used by the blockchain package. This allows the index to be 455 // seamlessly maintained along with the chain. 456 func NewTxIndex(db database.DB) *TxIndex { 457 return &TxIndex{db: db} 458 } 459 460 // dropBlockIDIndex drops the internal block id index. 461 func dropBlockIDIndex(db database.DB) error { 462 return db.Update(func(dbTx database.Tx) error { 463 meta := dbTx.Metadata() 464 err := meta.DeleteBucket(idByHashIndexBucketName) 465 if err != nil { 466 return err 467 } 468 469 return meta.DeleteBucket(hashByIDIndexBucketName) 470 }) 471 } 472 473 // DropTxIndex drops the transaction index from the provided database if it 474 // exists. Since the address index relies on it, the address index will also be 475 // dropped when it exists. 476 func DropTxIndex(db database.DB, interrupt <-chan struct{}) error { 477 err := dropIndex(db, addrIndexKey, addrIndexName, interrupt) 478 if err != nil { 479 return err 480 } 481 482 return dropIndex(db, txIndexKey, txIndexName, interrupt) 483 }