github.com/stellar/stellar-etl@v1.0.1-0.20240312145900-4874b6bf2b89/internal/input/changes.go (about)

     1  package input
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"math"
     8  
     9  	"github.com/stellar/stellar-etl/internal/utils"
    10  
    11  	"github.com/stellar/go/ingest"
    12  	"github.com/stellar/go/ingest/ledgerbackend"
    13  	"github.com/stellar/go/xdr"
    14  )
    15  
    16  var (
    17  	ExtractBatch = extractBatch
    18  )
    19  
    20  type LedgerChanges struct {
    21  	Changes       []ingest.Change
    22  	LedgerHeaders []xdr.LedgerHeaderHistoryEntry
    23  }
    24  
    25  // ChangeBatch represents the changes in a batch of ledgers represented by the range [BatchStart, BatchEnd)
    26  type ChangeBatch struct {
    27  	Changes    map[xdr.LedgerEntryType]LedgerChanges
    28  	BatchStart uint32
    29  	BatchEnd   uint32
    30  }
    31  
    32  // PrepareCaptiveCore creates a new captive core instance and prepares it with the given range. The range is unbounded when end = 0, and is bounded and validated otherwise
    33  func PrepareCaptiveCore(execPath string, tomlPath string, start, end uint32, env utils.EnvironmentDetails) (*ledgerbackend.CaptiveStellarCore, error) {
    34  	toml, err := ledgerbackend.NewCaptiveCoreTomlFromFile(
    35  		tomlPath,
    36  		ledgerbackend.CaptiveCoreTomlParams{
    37  			NetworkPassphrase:  env.NetworkPassphrase,
    38  			HistoryArchiveURLs: env.ArchiveURLs,
    39  			Strict:             true,
    40  			UseDB:              false,
    41  		},
    42  	)
    43  	if err != nil {
    44  		return &ledgerbackend.CaptiveStellarCore{}, err
    45  	}
    46  
    47  	captiveBackend, err := ledgerbackend.NewCaptive(
    48  		ledgerbackend.CaptiveCoreConfig{
    49  			BinaryPath:         execPath,
    50  			Toml:               toml,
    51  			NetworkPassphrase:  env.NetworkPassphrase,
    52  			HistoryArchiveURLs: env.ArchiveURLs,
    53  			UseDB:              false,
    54  			UserAgent:          "stellar-etl/1.0.0",
    55  		},
    56  	)
    57  	if err != nil {
    58  		return &ledgerbackend.CaptiveStellarCore{}, err
    59  	}
    60  
    61  	ledgerRange := ledgerbackend.UnboundedRange(start)
    62  
    63  	if end != 0 {
    64  		ledgerRange = ledgerbackend.BoundedRange(start, end)
    65  		latest, err := utils.GetLatestLedgerSequence(env.ArchiveURLs)
    66  		if err != nil {
    67  			return &ledgerbackend.CaptiveStellarCore{}, err
    68  		}
    69  
    70  		if err = utils.ValidateLedgerRange(start, end, latest); err != nil {
    71  			return &ledgerbackend.CaptiveStellarCore{}, err
    72  		}
    73  	}
    74  
    75  	ctx := context.Background()
    76  	err = captiveBackend.PrepareRange(ctx, ledgerRange)
    77  	if err != nil {
    78  		return &ledgerbackend.CaptiveStellarCore{}, err
    79  	}
    80  
    81  	return captiveBackend, nil
    82  }
    83  
    84  // extractBatch gets the changes from the ledgers in the range [batchStart, batchEnd] and compacts them
    85  func extractBatch(
    86  	batchStart, batchEnd uint32,
    87  	core *ledgerbackend.CaptiveStellarCore,
    88  	env utils.EnvironmentDetails, logger *utils.EtlLogger) ChangeBatch {
    89  
    90  	dataTypes := []xdr.LedgerEntryType{
    91  		xdr.LedgerEntryTypeAccount,
    92  		xdr.LedgerEntryTypeOffer,
    93  		xdr.LedgerEntryTypeTrustline,
    94  		xdr.LedgerEntryTypeLiquidityPool,
    95  		xdr.LedgerEntryTypeClaimableBalance,
    96  		xdr.LedgerEntryTypeContractData,
    97  		xdr.LedgerEntryTypeContractCode,
    98  		xdr.LedgerEntryTypeConfigSetting,
    99  		xdr.LedgerEntryTypeTtl}
   100  
   101  	ledgerChanges := map[xdr.LedgerEntryType]LedgerChanges{}
   102  	ctx := context.Background()
   103  	for seq := batchStart; seq <= batchEnd; {
   104  		changeCompactors := map[xdr.LedgerEntryType]*ingest.ChangeCompactor{}
   105  		for _, dt := range dataTypes {
   106  			changeCompactors[dt] = ingest.NewChangeCompactor()
   107  		}
   108  
   109  		latestLedger, err := core.GetLatestLedgerSequence(ctx)
   110  		if err != nil {
   111  			logger.Fatal("unable to get the latest ledger sequence: ", err)
   112  		}
   113  
   114  		// if this ledger is available, we process its changes and move on to the next ledger by incrementing seq.
   115  		// Otherwise, nothing is incremented, and we try again on the next iteration of the loop
   116  		var header xdr.LedgerHeaderHistoryEntry
   117  		if seq <= latestLedger {
   118  			changeReader, err := ingest.NewLedgerChangeReader(ctx, core, env.NetworkPassphrase, seq)
   119  			if err != nil {
   120  				logger.Fatal(fmt.Sprintf("unable to create change reader for ledger %d: ", seq), err)
   121  			}
   122  			header = changeReader.LedgerTransactionReader.GetHeader()
   123  
   124  			for {
   125  				change, err := changeReader.Read()
   126  				if err == io.EOF {
   127  					break
   128  				}
   129  				if err != nil {
   130  					logger.Fatal(fmt.Sprintf("unable to read changes from ledger %d: ", seq), err)
   131  				}
   132  				cache, ok := changeCompactors[change.Type]
   133  				if !ok {
   134  					// TODO: once LedgerEntryTypeData is tracked as well, all types should be addressed,
   135  					// so this info log should be a warning.
   136  					logger.Infof("change type: %v not tracked", change.Type)
   137  				} else {
   138  					cache.AddChange(change)
   139  				}
   140  			}
   141  
   142  			changeReader.Close()
   143  			seq++
   144  		}
   145  
   146  		for dataType, compactor := range changeCompactors {
   147  			for _, change := range compactor.GetChanges() {
   148  				dataTypeChanges := ledgerChanges[dataType]
   149  				dataTypeChanges.Changes = append(dataTypeChanges.Changes, change)
   150  				dataTypeChanges.LedgerHeaders = append(dataTypeChanges.LedgerHeaders, header)
   151  				ledgerChanges[dataType] = dataTypeChanges
   152  			}
   153  		}
   154  
   155  	}
   156  
   157  	return ChangeBatch{
   158  		Changes:    ledgerChanges,
   159  		BatchStart: batchStart,
   160  		BatchEnd:   batchEnd,
   161  	}
   162  }
   163  
   164  // StreamChanges reads in ledgers, processes the changes, and send the changes to the channel matching their type
   165  // Ledgers are processed in batches of size <batchSize>.
   166  func StreamChanges(core *ledgerbackend.CaptiveStellarCore, start, end, batchSize uint32, changeChannel chan ChangeBatch, closeChan chan int, env utils.EnvironmentDetails, logger *utils.EtlLogger) {
   167  	batchStart := start
   168  	batchEnd := uint32(math.Min(float64(batchStart+batchSize), float64(end)))
   169  	for batchStart < batchEnd {
   170  		if batchEnd < end {
   171  			batchEnd = uint32(batchEnd - 1)
   172  		}
   173  		batch := ExtractBatch(batchStart, batchEnd, core, env, logger)
   174  		changeChannel <- batch
   175  		// batchStart and batchEnd should not overlap
   176  		// overlapping batches causes duplicate record loads
   177  		batchStart = uint32(math.Min(float64(batchEnd), float64(end)) + 1)
   178  		batchEnd = uint32(math.Min(float64(batchStart+batchSize), float64(end)))
   179  	}
   180  	close(changeChannel)
   181  	closeChan <- 1
   182  }