github.com/stellar/stellar-etl@v1.0.1-0.20240312145900-4874b6bf2b89/internal/input/changes.go (about) 1 package input 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "math" 8 9 "github.com/stellar/stellar-etl/internal/utils" 10 11 "github.com/stellar/go/ingest" 12 "github.com/stellar/go/ingest/ledgerbackend" 13 "github.com/stellar/go/xdr" 14 ) 15 16 var ( 17 ExtractBatch = extractBatch 18 ) 19 20 type LedgerChanges struct { 21 Changes []ingest.Change 22 LedgerHeaders []xdr.LedgerHeaderHistoryEntry 23 } 24 25 // ChangeBatch represents the changes in a batch of ledgers represented by the range [BatchStart, BatchEnd) 26 type ChangeBatch struct { 27 Changes map[xdr.LedgerEntryType]LedgerChanges 28 BatchStart uint32 29 BatchEnd uint32 30 } 31 32 // PrepareCaptiveCore creates a new captive core instance and prepares it with the given range. The range is unbounded when end = 0, and is bounded and validated otherwise 33 func PrepareCaptiveCore(execPath string, tomlPath string, start, end uint32, env utils.EnvironmentDetails) (*ledgerbackend.CaptiveStellarCore, error) { 34 toml, err := ledgerbackend.NewCaptiveCoreTomlFromFile( 35 tomlPath, 36 ledgerbackend.CaptiveCoreTomlParams{ 37 NetworkPassphrase: env.NetworkPassphrase, 38 HistoryArchiveURLs: env.ArchiveURLs, 39 Strict: true, 40 UseDB: false, 41 }, 42 ) 43 if err != nil { 44 return &ledgerbackend.CaptiveStellarCore{}, err 45 } 46 47 captiveBackend, err := ledgerbackend.NewCaptive( 48 ledgerbackend.CaptiveCoreConfig{ 49 BinaryPath: execPath, 50 Toml: toml, 51 NetworkPassphrase: env.NetworkPassphrase, 52 HistoryArchiveURLs: env.ArchiveURLs, 53 UseDB: false, 54 UserAgent: "stellar-etl/1.0.0", 55 }, 56 ) 57 if err != nil { 58 return &ledgerbackend.CaptiveStellarCore{}, err 59 } 60 61 ledgerRange := ledgerbackend.UnboundedRange(start) 62 63 if end != 0 { 64 ledgerRange = ledgerbackend.BoundedRange(start, end) 65 latest, err := utils.GetLatestLedgerSequence(env.ArchiveURLs) 66 if err != nil { 67 return &ledgerbackend.CaptiveStellarCore{}, err 68 } 69 70 if err = utils.ValidateLedgerRange(start, end, latest); err != nil { 71 return &ledgerbackend.CaptiveStellarCore{}, err 72 } 73 } 74 75 ctx := context.Background() 76 err = captiveBackend.PrepareRange(ctx, ledgerRange) 77 if err != nil { 78 return &ledgerbackend.CaptiveStellarCore{}, err 79 } 80 81 return captiveBackend, nil 82 } 83 84 // extractBatch gets the changes from the ledgers in the range [batchStart, batchEnd] and compacts them 85 func extractBatch( 86 batchStart, batchEnd uint32, 87 core *ledgerbackend.CaptiveStellarCore, 88 env utils.EnvironmentDetails, logger *utils.EtlLogger) ChangeBatch { 89 90 dataTypes := []xdr.LedgerEntryType{ 91 xdr.LedgerEntryTypeAccount, 92 xdr.LedgerEntryTypeOffer, 93 xdr.LedgerEntryTypeTrustline, 94 xdr.LedgerEntryTypeLiquidityPool, 95 xdr.LedgerEntryTypeClaimableBalance, 96 xdr.LedgerEntryTypeContractData, 97 xdr.LedgerEntryTypeContractCode, 98 xdr.LedgerEntryTypeConfigSetting, 99 xdr.LedgerEntryTypeTtl} 100 101 ledgerChanges := map[xdr.LedgerEntryType]LedgerChanges{} 102 ctx := context.Background() 103 for seq := batchStart; seq <= batchEnd; { 104 changeCompactors := map[xdr.LedgerEntryType]*ingest.ChangeCompactor{} 105 for _, dt := range dataTypes { 106 changeCompactors[dt] = ingest.NewChangeCompactor() 107 } 108 109 latestLedger, err := core.GetLatestLedgerSequence(ctx) 110 if err != nil { 111 logger.Fatal("unable to get the latest ledger sequence: ", err) 112 } 113 114 // if this ledger is available, we process its changes and move on to the next ledger by incrementing seq. 115 // Otherwise, nothing is incremented, and we try again on the next iteration of the loop 116 var header xdr.LedgerHeaderHistoryEntry 117 if seq <= latestLedger { 118 changeReader, err := ingest.NewLedgerChangeReader(ctx, core, env.NetworkPassphrase, seq) 119 if err != nil { 120 logger.Fatal(fmt.Sprintf("unable to create change reader for ledger %d: ", seq), err) 121 } 122 header = changeReader.LedgerTransactionReader.GetHeader() 123 124 for { 125 change, err := changeReader.Read() 126 if err == io.EOF { 127 break 128 } 129 if err != nil { 130 logger.Fatal(fmt.Sprintf("unable to read changes from ledger %d: ", seq), err) 131 } 132 cache, ok := changeCompactors[change.Type] 133 if !ok { 134 // TODO: once LedgerEntryTypeData is tracked as well, all types should be addressed, 135 // so this info log should be a warning. 136 logger.Infof("change type: %v not tracked", change.Type) 137 } else { 138 cache.AddChange(change) 139 } 140 } 141 142 changeReader.Close() 143 seq++ 144 } 145 146 for dataType, compactor := range changeCompactors { 147 for _, change := range compactor.GetChanges() { 148 dataTypeChanges := ledgerChanges[dataType] 149 dataTypeChanges.Changes = append(dataTypeChanges.Changes, change) 150 dataTypeChanges.LedgerHeaders = append(dataTypeChanges.LedgerHeaders, header) 151 ledgerChanges[dataType] = dataTypeChanges 152 } 153 } 154 155 } 156 157 return ChangeBatch{ 158 Changes: ledgerChanges, 159 BatchStart: batchStart, 160 BatchEnd: batchEnd, 161 } 162 } 163 164 // StreamChanges reads in ledgers, processes the changes, and send the changes to the channel matching their type 165 // Ledgers are processed in batches of size <batchSize>. 166 func StreamChanges(core *ledgerbackend.CaptiveStellarCore, start, end, batchSize uint32, changeChannel chan ChangeBatch, closeChan chan int, env utils.EnvironmentDetails, logger *utils.EtlLogger) { 167 batchStart := start 168 batchEnd := uint32(math.Min(float64(batchStart+batchSize), float64(end))) 169 for batchStart < batchEnd { 170 if batchEnd < end { 171 batchEnd = uint32(batchEnd - 1) 172 } 173 batch := ExtractBatch(batchStart, batchEnd, core, env, logger) 174 changeChannel <- batch 175 // batchStart and batchEnd should not overlap 176 // overlapping batches causes duplicate record loads 177 batchStart = uint32(math.Min(float64(batchEnd), float64(end)) + 1) 178 batchEnd = uint32(math.Min(float64(batchStart+batchSize), float64(end))) 179 } 180 close(changeChannel) 181 closeChan <- 1 182 }