github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/pkg/index/download_chunks.go (about) 1 // Copyright 2021 The TrueBlocks Authors. All rights reserved. 2 // Use of this source code is governed by a license that can 3 // be found in the LICENSE file. 4 5 package index 6 7 // Fetching, unzipping, validating and saving both index and bloom chunks 8 9 import ( 10 "context" 11 "errors" 12 "fmt" 13 "io" 14 "net/http" 15 "net/url" 16 "os" 17 "path" 18 "path/filepath" 19 "strconv" 20 "strings" 21 "sync" 22 23 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/colors" 24 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/config" 25 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/debug" 26 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/file" 27 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/logger" 28 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/progress" 29 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/sigintTrap" 30 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/types" 31 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/utils" 32 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/walk" 33 ants "github.com/panjf2000/ants/v2" 34 ) 35 36 // jobResult type is used to carry both downloaded data and some 37 // metadata to decompressing/file writing function through a channel 38 type jobResult struct { 39 rng string 40 fileSize int64 41 contents io.Reader 42 theChunk *types.ChunkRecord 43 } 44 45 type progressChan chan<- *progress.ProgressMsg 46 47 // Types of errors put into the progressChannel 48 49 var ErrFailedLocalFileRemoval = errors.New("failed to remove local file") 50 var ErrUserHitControlC = errors.New("user hit control + c") 51 var ErrDownloadError = errors.New("download error") 52 var ErrWriteToDiscError = errors.New("write to disc error") 53 54 // WorkerArguments are types meant to hold worker function arguments. We cannot 55 // pass the arguments directly, because a worker function is expected to take one 56 // parameter of type interface{}. 57 type downloadWorkerArguments struct { 58 ctx context.Context 59 progressChannel progressChan 60 gatewayUrl string 61 downloadWg *sync.WaitGroup 62 writeChannel chan *jobResult 63 nRetries int 64 } 65 66 type writeWorkerArguments struct { 67 ctx context.Context 68 progressChannel progressChan 69 cancel context.CancelFunc 70 writeWg *sync.WaitGroup 71 } 72 73 // worker function type as accepted by Ants 74 type workerFunction func(interface{}) 75 76 // getDownloadWorker returns a worker function that downloads a chunk 77 func getDownloadWorker(chain string, workerArgs downloadWorkerArguments, chunkType walk.CacheType) workerFunction { 78 progressChannel := workerArgs.progressChannel 79 80 return func(param interface{}) { 81 chunk := param.(types.ChunkRecord) 82 83 defer workerArgs.downloadWg.Done() 84 85 select { 86 case <-workerArgs.ctx.Done(): 87 // Cancel 88 return 89 90 default: 91 hash := chunk.BloomHash 92 if chunkType == walk.Index_Final { 93 hash = chunk.IndexHash 94 } 95 if hash != "" { 96 97 // TODO: Do we really need the colored display? 98 bHash := utils.FormattedHash(false, chunk.BloomHash.String()) 99 iHash := utils.FormattedHash(false, chunk.IndexHash.String()) 100 tHash := utils.FormattedHash(false, hash.String()) 101 msg := fmt.Sprintf("%s %s %s", chunk.Range, bHash, iHash) 102 msg = strings.Replace(msg, tHash, colors.BrightCyan+tHash+colors.Off, -1) 103 progressChannel <- &progress.ProgressMsg{ 104 Payload: &chunk, 105 Event: progress.Start, 106 Message: msg, 107 } 108 109 download, err := fetchFromIpfsGateway(workerArgs.ctx, workerArgs.gatewayUrl, hash.String()) 110 if errors.Is(workerArgs.ctx.Err(), context.Canceled) { 111 // The request to fetch the chunk was cancelled, because user has 112 // pressed Ctrl-C 113 return 114 } 115 116 if workerArgs.ctx.Err() != nil { 117 // User hit control + c - clean up both peices for the current chunk 118 chunkPath := filepath.Join(config.PathToIndex(chain), "finalized", chunk.Range+".bin") 119 removeLocalFile(ToIndexPath(chunkPath), "user canceled", progressChannel) 120 removeLocalFile(ToBloomPath(chunkPath), "user canceled", progressChannel) 121 progressChannel <- &progress.ProgressMsg{ 122 Payload: &chunk, 123 Event: progress.Error, 124 Error: fmt.Errorf("%w [%s]", ErrUserHitControlC, workerArgs.ctx.Err().Error()), 125 } 126 return 127 } 128 if err == nil { 129 workerArgs.writeChannel <- &jobResult{ 130 rng: chunk.Range, 131 fileSize: download.ContentLen, 132 contents: download.Body, 133 theChunk: &chunk, 134 } 135 } else { 136 progressChannel <- &progress.ProgressMsg{ 137 Payload: &chunk, 138 Event: progress.Error, 139 Error: fmt.Errorf("%w [%s]", ErrDownloadError, err.Error()), 140 } 141 } 142 } 143 } 144 } 145 } 146 147 // fetchResult type make it easier to return both download content and 148 // download size information (for validation purposes) 149 type fetchResult struct { 150 Body io.ReadCloser 151 ContentLen int64 // download size in bytes 152 } 153 154 // fetchFromIpfsGateway downloads a chunk from an IPFS gateway using HTTP 155 func fetchFromIpfsGateway(ctx context.Context, gateway, hash string) (*fetchResult, error) { 156 url, _ := url.Parse(gateway) 157 url.Path = path.Join(url.Path, hash) 158 159 debug.DebugCurlStr(url.String()) 160 request, err := http.NewRequestWithContext(ctx, "GET", url.String(), nil) 161 if err != nil { 162 return nil, fmt.Errorf("NewRequestWithContext %s returned error: %w", url, err) 163 } 164 165 response, err := http.DefaultClient.Do(request) 166 if err != nil { 167 return nil, fmt.Errorf("DefaultClient.Do %s returned error: %w", url, err) 168 } 169 170 if response.StatusCode != 200 { 171 return nil, fmt.Errorf("fetchFromIpfsGateway %s returned status code: %d", url, response.StatusCode) 172 } 173 174 contentLen := int64(0) 175 if len(response.Header.Get("Content-Length")) != 0 { 176 contentLen, err = strconv.ParseInt(response.Header.Get("Content-Length"), 10, 64) 177 if err != nil { 178 return nil, fmt.Errorf("response.Header.Get %s returned error: %w", url, err) 179 } 180 } 181 182 body := response.Body 183 return &fetchResult{ 184 Body: body, 185 ContentLen: contentLen, 186 }, nil 187 } 188 189 // getWriteWorker returns a worker function that writes chunk to disk 190 func getWriteWorker(chain string, workerArgs writeWorkerArguments, chunkType walk.CacheType) workerFunction { 191 progressChannel := workerArgs.progressChannel 192 193 return func(resParam interface{}) { 194 // Take download data from the channel and save it 195 res := resParam.(*jobResult) 196 197 defer workerArgs.writeWg.Done() 198 199 select { 200 case <-workerArgs.ctx.Done(): 201 return 202 default: 203 cleanOnQuit := func() { 204 logger.Warn(sigintTrap.TrapMessage) 205 } 206 trapChannel := sigintTrap.Enable(workerArgs.ctx, workerArgs.cancel, cleanOnQuit) 207 err := writeBytesToDisc(chain, chunkType, res) 208 sigintTrap.Disable(trapChannel) 209 if errors.Is(workerArgs.ctx.Err(), context.Canceled) { 210 // Ctrl-C was pressed, cancel 211 return 212 } 213 214 if err != nil { 215 progressChannel <- &progress.ProgressMsg{ 216 Payload: res.theChunk, 217 Event: progress.Error, 218 Error: fmt.Errorf("%w [%s]", ErrWriteToDiscError, err.Error()), 219 } 220 return 221 } 222 223 progressChannel <- &progress.ProgressMsg{ 224 Payload: res.theChunk, 225 Event: progress.Finished, 226 Message: chunkType.String(), 227 } 228 } 229 } 230 } 231 232 // DownloadChunks downloads, unzips and saves the chunk of type indicated by chunkType 233 // for each chunk in chunks. ProgressMsg is reported to progressChannel. 234 func DownloadChunks(chain string, chunksToDownload []types.ChunkRecord, chunkType walk.CacheType, poolSize int, progressChannel progressChan) { 235 // Context lets us handle Ctrl-C easily 236 ctx, cancel := context.WithCancel(context.Background()) 237 defer func() { 238 cancel() 239 }() 240 241 var downloadWg sync.WaitGroup 242 writeChannel := make(chan *jobResult, poolSize) 243 downloadWorkerArgs := downloadWorkerArguments{ 244 ctx: ctx, 245 progressChannel: progressChannel, 246 downloadWg: &downloadWg, 247 gatewayUrl: config.GetChain(chain).IpfsGateway, 248 writeChannel: writeChannel, 249 nRetries: 8, 250 } 251 downloadPool, err := ants.NewPoolWithFunc(poolSize, getDownloadWorker(chain, downloadWorkerArgs, chunkType)) 252 defer downloadPool.Release() 253 if err != nil { 254 panic(err) 255 } 256 257 var writeWg sync.WaitGroup 258 writeWorkerArgs := writeWorkerArguments{ 259 ctx: ctx, 260 progressChannel: progressChannel, 261 cancel: cancel, 262 writeWg: &writeWg, 263 } 264 writePool, err := ants.NewPoolWithFunc(poolSize, getWriteWorker(chain, writeWorkerArgs, chunkType)) 265 defer writePool.Release() 266 if err != nil { 267 panic(err) 268 } 269 270 // Closed in the go routine after we're finished writing or the user cancels 271 writeWg.Add(1) 272 go func() { 273 for result := range writeChannel { 274 if ctx.Err() != nil { 275 // The user hit Ctrl-C. It may have been disabled by sigintTrap, so we 276 // must drain the channel. Otherwise, it will deadlock 277 continue 278 } 279 280 // Closed inside the invocation 281 writeWg.Add(1) 282 _ = writePool.Invoke(result) 283 } 284 285 // Close the opening wg when all writes are finished or the user canceled 286 writeWg.Done() 287 }() 288 289 for _, chunk := range chunksToDownload { 290 downloadWg.Add(1) 291 _ = downloadPool.Invoke(chunk) 292 } 293 downloadWg.Wait() 294 295 close(writeChannel) 296 writeWg.Wait() 297 298 if errors.Is(ctx.Err(), context.Canceled) { 299 progressChannel <- &progress.ProgressMsg{ 300 Event: progress.Cancelled, 301 } 302 return 303 } 304 305 progressChannel <- &progress.ProgressMsg{ 306 Event: progress.AllDone, 307 } 308 } 309 310 // writeBytesToDisc save the downloaded bytes to disc 311 func writeBytesToDisc(chain string, chunkType walk.CacheType, res *jobResult) error { 312 fullPath := filepath.Join(config.PathToIndex(chain), "finalized", res.rng+".bin") 313 if chunkType == walk.Index_Bloom { 314 fullPath = ToBloomPath(fullPath) 315 } 316 outputFile, err := os.OpenFile(fullPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) 317 if err != nil { 318 return fmt.Errorf("error creating output file file %s in writeBytesToDisc: [%s]", res.rng, err) 319 } 320 321 // Save downloaded bytes to a file 322 _, err = io.Copy(outputFile, res.contents) 323 if err != nil { 324 if file.FileExists(outputFile.Name()) { 325 outputFile.Close() 326 os.Remove(outputFile.Name()) 327 col := colors.Magenta 328 if fullPath == ToIndexPath(fullPath) { 329 col = colors.Yellow 330 } 331 logger.Warn("Failed download", col, res.rng, colors.Off, "(will retry)", strings.Repeat(" ", 30)) 332 } 333 // Information about this error 334 // https://community.k6.io/t/warn-0040-request-failed-error-stream-error-stream-id-3-internal-error/777/2 335 return fmt.Errorf("error copying %s file in writeBytesToDisc: [%s]", res.rng, err) 336 } 337 338 outputFile.Close() 339 return nil 340 } 341 342 func removeLocalFile(fullPath, reason string, progressChannel progressChan) bool { 343 if file.FileExists(fullPath) { 344 err := os.Remove(fullPath) 345 if err != nil { 346 progressChannel <- &progress.ProgressMsg{ 347 Event: progress.Error, 348 Error: fmt.Errorf("%w %s [%s]", ErrFailedLocalFileRemoval, fullPath, err.Error()), 349 } 350 } else { 351 progressChannel <- &progress.ProgressMsg{ 352 Event: progress.Update, 353 Message: fmt.Sprintf("File %s removed [%s]", fullPath, reason), 354 } 355 } 356 } 357 return file.FileExists(fullPath) 358 }