github.com/lbryio/lbcd@v0.22.119/blockchain/indexers/txindex.go (about)

     1  // Copyright (c) 2016 The btcsuite developers
     2  // Use of this source code is governed by an ISC
     3  // license that can be found in the LICENSE file.
     4  
     5  package indexers
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  
    11  	"github.com/lbryio/lbcd/blockchain"
    12  	"github.com/lbryio/lbcd/chaincfg/chainhash"
    13  	"github.com/lbryio/lbcd/database"
    14  	"github.com/lbryio/lbcd/wire"
    15  	btcutil "github.com/lbryio/lbcutil"
    16  )
    17  
    18  const (
    19  	// txIndexName is the human-readable name for the index.
    20  	txIndexName = "transaction index"
    21  )
    22  
    23  var (
    24  	// txIndexKey is the key of the transaction index and the db bucket used
    25  	// to house it.
    26  	txIndexKey = []byte("txbyhashidx")
    27  
    28  	// idByHashIndexBucketName is the name of the db bucket used to house
    29  	// the block id -> block hash index.
    30  	idByHashIndexBucketName = []byte("idbyhashidx")
    31  
    32  	// hashByIDIndexBucketName is the name of the db bucket used to house
    33  	// the block hash -> block id index.
    34  	hashByIDIndexBucketName = []byte("hashbyididx")
    35  
    36  	// errNoBlockIDEntry is an error that indicates a requested entry does
    37  	// not exist in the block ID index.
    38  	errNoBlockIDEntry = errors.New("no entry in the block ID index")
    39  )
    40  
    41  // -----------------------------------------------------------------------------
    42  // The transaction index consists of an entry for every transaction in the main
    43  // chain.  In order to significantly optimize the space requirements a separate
    44  // index which provides an internal mapping between each block that has been
    45  // indexed and a unique ID for use within the hash to location mappings.  The ID
    46  // is simply a sequentially incremented uint32.  This is useful because it is
    47  // only 4 bytes versus 32 bytes hashes and thus saves a ton of space in the
    48  // index.
    49  //
    50  // There are three buckets used in total.  The first bucket maps the hash of
    51  // each transaction to the specific block location.  The second bucket maps the
    52  // hash of each block to the unique ID and the third maps that ID back to the
    53  // block hash.
    54  //
    55  // NOTE: Although it is technically possible for multiple transactions to have
    56  // the same hash as long as the previous transaction with the same hash is fully
    57  // spent, this code only stores the most recent one because doing otherwise
    58  // would add a non-trivial amount of space and overhead for something that will
    59  // realistically never happen per the probability and even if it did, the old
    60  // one must be fully spent and so the most likely transaction a caller would
    61  // want for a given hash is the most recent one anyways.
    62  //
    63  // The serialized format for keys and values in the block hash to ID bucket is:
    64  //   <hash> = <ID>
    65  //
    66  //   Field           Type              Size
    67  //   hash            chainhash.Hash    32 bytes
    68  //   ID              uint32            4 bytes
    69  //   -----
    70  //   Total: 36 bytes
    71  //
    72  // The serialized format for keys and values in the ID to block hash bucket is:
    73  //   <ID> = <hash>
    74  //
    75  //   Field           Type              Size
    76  //   ID              uint32            4 bytes
    77  //   hash            chainhash.Hash    32 bytes
    78  //   -----
    79  //   Total: 36 bytes
    80  //
    81  // The serialized format for the keys and values in the tx index bucket is:
    82  //
    83  //   <txhash> = <block id><start offset><tx length>
    84  //
    85  //   Field           Type              Size
    86  //   txhash          chainhash.Hash    32 bytes
    87  //   block id        uint32            4 bytes
    88  //   start offset    uint32          4 bytes
    89  //   tx length       uint32          4 bytes
    90  //   -----
    91  //   Total: 44 bytes
    92  // -----------------------------------------------------------------------------
    93  
    94  // dbPutBlockIDIndexEntry uses an existing database transaction to update or add
    95  // the index entries for the hash to id and id to hash mappings for the provided
    96  // values.
    97  func dbPutBlockIDIndexEntry(dbTx database.Tx, hash *chainhash.Hash, id uint32) error {
    98  	// Serialize the height for use in the index entries.
    99  	var serializedID [4]byte
   100  	byteOrder.PutUint32(serializedID[:], id)
   101  
   102  	// Add the block hash to ID mapping to the index.
   103  	meta := dbTx.Metadata()
   104  	hashIndex := meta.Bucket(idByHashIndexBucketName)
   105  	if err := hashIndex.Put(hash[:], serializedID[:]); err != nil {
   106  		return err
   107  	}
   108  
   109  	// Add the block ID to hash mapping to the index.
   110  	idIndex := meta.Bucket(hashByIDIndexBucketName)
   111  	return idIndex.Put(serializedID[:], hash[:])
   112  }
   113  
   114  // dbRemoveBlockIDIndexEntry uses an existing database transaction remove index
   115  // entries from the hash to id and id to hash mappings for the provided hash.
   116  func dbRemoveBlockIDIndexEntry(dbTx database.Tx, hash *chainhash.Hash) error {
   117  	// Remove the block hash to ID mapping.
   118  	meta := dbTx.Metadata()
   119  	hashIndex := meta.Bucket(idByHashIndexBucketName)
   120  	serializedID := hashIndex.Get(hash[:])
   121  	if serializedID == nil {
   122  		return nil
   123  	}
   124  	if err := hashIndex.Delete(hash[:]); err != nil {
   125  		return err
   126  	}
   127  
   128  	// Remove the block ID to hash mapping.
   129  	idIndex := meta.Bucket(hashByIDIndexBucketName)
   130  	return idIndex.Delete(serializedID)
   131  }
   132  
   133  // dbFetchBlockIDByHash uses an existing database transaction to retrieve the
   134  // block id for the provided hash from the index.
   135  func dbFetchBlockIDByHash(dbTx database.Tx, hash *chainhash.Hash) (uint32, error) {
   136  	hashIndex := dbTx.Metadata().Bucket(idByHashIndexBucketName)
   137  	serializedID := hashIndex.Get(hash[:])
   138  	if serializedID == nil {
   139  		return 0, errNoBlockIDEntry
   140  	}
   141  
   142  	return byteOrder.Uint32(serializedID), nil
   143  }
   144  
   145  // dbFetchBlockHashBySerializedID uses an existing database transaction to
   146  // retrieve the hash for the provided serialized block id from the index.
   147  func dbFetchBlockHashBySerializedID(dbTx database.Tx, serializedID []byte) (*chainhash.Hash, error) {
   148  	idIndex := dbTx.Metadata().Bucket(hashByIDIndexBucketName)
   149  	hashBytes := idIndex.Get(serializedID)
   150  	if hashBytes == nil {
   151  		return nil, errNoBlockIDEntry
   152  	}
   153  
   154  	var hash chainhash.Hash
   155  	copy(hash[:], hashBytes)
   156  	return &hash, nil
   157  }
   158  
   159  // dbFetchBlockHashByID uses an existing database transaction to retrieve the
   160  // hash for the provided block id from the index.
   161  func dbFetchBlockHashByID(dbTx database.Tx, id uint32) (*chainhash.Hash, error) {
   162  	var serializedID [4]byte
   163  	byteOrder.PutUint32(serializedID[:], id)
   164  	return dbFetchBlockHashBySerializedID(dbTx, serializedID[:])
   165  }
   166  
   167  // putTxIndexEntry serializes the provided values according to the format
   168  // described about for a transaction index entry.  The target byte slice must
   169  // be at least large enough to handle the number of bytes defined by the
   170  // txEntrySize constant or it will panic.
   171  func putTxIndexEntry(target []byte, blockID uint32, txLoc wire.TxLoc) {
   172  	byteOrder.PutUint32(target, blockID)
   173  	byteOrder.PutUint32(target[4:], uint32(txLoc.TxStart))
   174  	byteOrder.PutUint32(target[8:], uint32(txLoc.TxLen))
   175  }
   176  
   177  // dbPutTxIndexEntry uses an existing database transaction to update the
   178  // transaction index given the provided serialized data that is expected to have
   179  // been serialized putTxIndexEntry.
   180  func dbPutTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash, serializedData []byte) error {
   181  	txIndex := dbTx.Metadata().Bucket(txIndexKey)
   182  	return txIndex.Put(txHash[:], serializedData)
   183  }
   184  
   185  // dbFetchTxIndexEntry uses an existing database transaction to fetch the block
   186  // region for the provided transaction hash from the transaction index.  When
   187  // there is no entry for the provided hash, nil will be returned for the both
   188  // the region and the error.
   189  func dbFetchTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash) (*database.BlockRegion, error) {
   190  	// Load the record from the database and return now if it doesn't exist.
   191  	txIndex := dbTx.Metadata().Bucket(txIndexKey)
   192  	serializedData := txIndex.Get(txHash[:])
   193  	if len(serializedData) == 0 {
   194  		return nil, nil
   195  	}
   196  
   197  	// Ensure the serialized data has enough bytes to properly deserialize.
   198  	if len(serializedData) < 12 {
   199  		return nil, database.Error{
   200  			ErrorCode: database.ErrCorruption,
   201  			Description: fmt.Sprintf("corrupt transaction index "+
   202  				"entry for %s", txHash),
   203  		}
   204  	}
   205  
   206  	// Load the block hash associated with the block ID.
   207  	hash, err := dbFetchBlockHashBySerializedID(dbTx, serializedData[0:4])
   208  	if err != nil {
   209  		return nil, database.Error{
   210  			ErrorCode: database.ErrCorruption,
   211  			Description: fmt.Sprintf("corrupt transaction index "+
   212  				"entry for %s: %v", txHash, err),
   213  		}
   214  	}
   215  
   216  	// Deserialize the final entry.
   217  	region := database.BlockRegion{Hash: &chainhash.Hash{}}
   218  	copy(region.Hash[:], hash[:])
   219  	region.Offset = byteOrder.Uint32(serializedData[4:8])
   220  	region.Len = byteOrder.Uint32(serializedData[8:12])
   221  
   222  	return &region, nil
   223  }
   224  
   225  // dbAddTxIndexEntries uses an existing database transaction to add a
   226  // transaction index entry for every transaction in the passed block.
   227  func dbAddTxIndexEntries(dbTx database.Tx, block *btcutil.Block, blockID uint32) error {
   228  	// The offset and length of the transactions within the serialized
   229  	// block.
   230  	txLocs, err := block.TxLoc()
   231  	if err != nil {
   232  		return err
   233  	}
   234  
   235  	// As an optimization, allocate a single slice big enough to hold all
   236  	// of the serialized transaction index entries for the block and
   237  	// serialize them directly into the slice.  Then, pass the appropriate
   238  	// subslice to the database to be written.  This approach significantly
   239  	// cuts down on the number of required allocations.
   240  	offset := 0
   241  	serializedValues := make([]byte, len(block.Transactions())*txEntrySize)
   242  	for i, tx := range block.Transactions() {
   243  		putTxIndexEntry(serializedValues[offset:], blockID, txLocs[i])
   244  		endOffset := offset + txEntrySize
   245  		err := dbPutTxIndexEntry(dbTx, tx.Hash(),
   246  			serializedValues[offset:endOffset:endOffset])
   247  		if err != nil {
   248  			return err
   249  		}
   250  		offset += txEntrySize
   251  	}
   252  
   253  	return nil
   254  }
   255  
   256  // dbRemoveTxIndexEntry uses an existing database transaction to remove the most
   257  // recent transaction index entry for the given hash.
   258  func dbRemoveTxIndexEntry(dbTx database.Tx, txHash *chainhash.Hash) error {
   259  	txIndex := dbTx.Metadata().Bucket(txIndexKey)
   260  	serializedData := txIndex.Get(txHash[:])
   261  	if len(serializedData) == 0 {
   262  		return fmt.Errorf("can't remove non-existent transaction %s "+
   263  			"from the transaction index", txHash)
   264  	}
   265  
   266  	return txIndex.Delete(txHash[:])
   267  }
   268  
   269  // dbRemoveTxIndexEntries uses an existing database transaction to remove the
   270  // latest transaction entry for every transaction in the passed block.
   271  func dbRemoveTxIndexEntries(dbTx database.Tx, block *btcutil.Block) error {
   272  	for _, tx := range block.Transactions() {
   273  		err := dbRemoveTxIndexEntry(dbTx, tx.Hash())
   274  		if err != nil {
   275  			return err
   276  		}
   277  	}
   278  
   279  	return nil
   280  }
   281  
   282  // TxIndex implements a transaction by hash index.  That is to say, it supports
   283  // querying all transactions by their hash.
   284  type TxIndex struct {
   285  	db         database.DB
   286  	curBlockID uint32
   287  }
   288  
   289  // Ensure the TxIndex type implements the Indexer interface.
   290  var _ Indexer = (*TxIndex)(nil)
   291  
   292  // Init initializes the hash-based transaction index.  In particular, it finds
   293  // the highest used block ID and stores it for later use when connecting or
   294  // disconnecting blocks.
   295  //
   296  // This is part of the Indexer interface.
   297  func (idx *TxIndex) Init() error {
   298  	// Find the latest known block id field for the internal block id
   299  	// index and initialize it.  This is done because it's a lot more
   300  	// efficient to do a single search at initialize time than it is to
   301  	// write another value to the database on every update.
   302  	err := idx.db.View(func(dbTx database.Tx) error {
   303  		// Scan forward in large gaps to find a block id that doesn't
   304  		// exist yet to serve as an upper bound for the binary search
   305  		// below.
   306  		var highestKnown, nextUnknown uint32
   307  		testBlockID := uint32(1)
   308  		increment := uint32(100000)
   309  		for {
   310  			_, err := dbFetchBlockHashByID(dbTx, testBlockID)
   311  			if err != nil {
   312  				nextUnknown = testBlockID
   313  				break
   314  			}
   315  
   316  			highestKnown = testBlockID
   317  			testBlockID += increment
   318  		}
   319  		log.Tracef("Forward scan (highest known %d, next unknown %d)",
   320  			highestKnown, nextUnknown)
   321  
   322  		// No used block IDs due to new database.
   323  		if nextUnknown == 1 {
   324  			return nil
   325  		}
   326  
   327  		// Use a binary search to find the final highest used block id.
   328  		// This will take at most ceil(log_2(increment)) attempts.
   329  		for {
   330  			testBlockID = (highestKnown + nextUnknown) / 2
   331  			_, err := dbFetchBlockHashByID(dbTx, testBlockID)
   332  			if err != nil {
   333  				nextUnknown = testBlockID
   334  			} else {
   335  				highestKnown = testBlockID
   336  			}
   337  			log.Tracef("Binary scan (highest known %d, next "+
   338  				"unknown %d)", highestKnown, nextUnknown)
   339  			if highestKnown+1 == nextUnknown {
   340  				break
   341  			}
   342  		}
   343  
   344  		idx.curBlockID = highestKnown
   345  		return nil
   346  	})
   347  	if err != nil {
   348  		return err
   349  	}
   350  
   351  	log.Debugf("Current internal block ID: %d", idx.curBlockID)
   352  	return nil
   353  }
   354  
   355  // Key returns the database key to use for the index as a byte slice.
   356  //
   357  // This is part of the Indexer interface.
   358  func (idx *TxIndex) Key() []byte {
   359  	return txIndexKey
   360  }
   361  
   362  // Name returns the human-readable name of the index.
   363  //
   364  // This is part of the Indexer interface.
   365  func (idx *TxIndex) Name() string {
   366  	return txIndexName
   367  }
   368  
   369  // Create is invoked when the indexer manager determines the index needs
   370  // to be created for the first time.  It creates the buckets for the hash-based
   371  // transaction index and the internal block ID indexes.
   372  //
   373  // This is part of the Indexer interface.
   374  func (idx *TxIndex) Create(dbTx database.Tx) error {
   375  	meta := dbTx.Metadata()
   376  	if _, err := meta.CreateBucket(idByHashIndexBucketName); err != nil {
   377  		return err
   378  	}
   379  	if _, err := meta.CreateBucket(hashByIDIndexBucketName); err != nil {
   380  		return err
   381  	}
   382  	_, err := meta.CreateBucket(txIndexKey)
   383  	return err
   384  }
   385  
   386  // ConnectBlock is invoked by the index manager when a new block has been
   387  // connected to the main chain.  This indexer adds a hash-to-transaction mapping
   388  // for every transaction in the passed block.
   389  //
   390  // This is part of the Indexer interface.
   391  func (idx *TxIndex) ConnectBlock(dbTx database.Tx, block *btcutil.Block,
   392  	stxos []blockchain.SpentTxOut) error {
   393  
   394  	// Increment the internal block ID to use for the block being connected
   395  	// and add all of the transactions in the block to the index.
   396  	newBlockID := idx.curBlockID + 1
   397  	if err := dbAddTxIndexEntries(dbTx, block, newBlockID); err != nil {
   398  		return err
   399  	}
   400  
   401  	// Add the new block ID index entry for the block being connected and
   402  	// update the current internal block ID accordingly.
   403  	err := dbPutBlockIDIndexEntry(dbTx, block.Hash(), newBlockID)
   404  	if err != nil {
   405  		return err
   406  	}
   407  	idx.curBlockID = newBlockID
   408  	return nil
   409  }
   410  
   411  // DisconnectBlock is invoked by the index manager when a block has been
   412  // disconnected from the main chain.  This indexer removes the
   413  // hash-to-transaction mapping for every transaction in the block.
   414  //
   415  // This is part of the Indexer interface.
   416  func (idx *TxIndex) DisconnectBlock(dbTx database.Tx, block *btcutil.Block,
   417  	stxos []blockchain.SpentTxOut) error {
   418  
   419  	// Remove all of the transactions in the block from the index.
   420  	if err := dbRemoveTxIndexEntries(dbTx, block); err != nil {
   421  		return err
   422  	}
   423  
   424  	// Remove the block ID index entry for the block being disconnected and
   425  	// decrement the current internal block ID to account for it.
   426  	if err := dbRemoveBlockIDIndexEntry(dbTx, block.Hash()); err != nil {
   427  		return err
   428  	}
   429  	idx.curBlockID--
   430  	return nil
   431  }
   432  
   433  // TxBlockRegion returns the block region for the provided transaction hash
   434  // from the transaction index.  The block region can in turn be used to load the
   435  // raw transaction bytes.  When there is no entry for the provided hash, nil
   436  // will be returned for the both the entry and the error.
   437  //
   438  // This function is safe for concurrent access.
   439  func (idx *TxIndex) TxBlockRegion(hash *chainhash.Hash) (*database.BlockRegion, error) {
   440  	var region *database.BlockRegion
   441  	err := idx.db.View(func(dbTx database.Tx) error {
   442  		var err error
   443  		region, err = dbFetchTxIndexEntry(dbTx, hash)
   444  		return err
   445  	})
   446  	return region, err
   447  }
   448  
   449  // NewTxIndex returns a new instance of an indexer that is used to create a
   450  // mapping of the hashes of all transactions in the blockchain to the respective
   451  // block, location within the block, and size of the transaction.
   452  //
   453  // It implements the Indexer interface which plugs into the IndexManager that in
   454  // turn is used by the blockchain package.  This allows the index to be
   455  // seamlessly maintained along with the chain.
   456  func NewTxIndex(db database.DB) *TxIndex {
   457  	return &TxIndex{db: db}
   458  }
   459  
   460  // dropBlockIDIndex drops the internal block id index.
   461  func dropBlockIDIndex(db database.DB) error {
   462  	return db.Update(func(dbTx database.Tx) error {
   463  		meta := dbTx.Metadata()
   464  		err := meta.DeleteBucket(idByHashIndexBucketName)
   465  		if err != nil {
   466  			return err
   467  		}
   468  
   469  		return meta.DeleteBucket(hashByIDIndexBucketName)
   470  	})
   471  }
   472  
   473  // DropTxIndex drops the transaction index from the provided database if it
   474  // exists.  Since the address index relies on it, the address index will also be
   475  // dropped when it exists.
   476  func DropTxIndex(db database.DB, interrupt <-chan struct{}) error {
   477  	err := dropIndex(db, addrIndexKey, addrIndexName, interrupt)
   478  	if err != nil {
   479  		return err
   480  	}
   481  
   482  	return dropIndex(db, txIndexKey, txIndexName, interrupt)
   483  }