github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/pkg/index/download_chunks.go (about)

     1  // Copyright 2021 The TrueBlocks Authors. All rights reserved.
     2  // Use of this source code is governed by a license that can
     3  // be found in the LICENSE file.
     4  
     5  package index
     6  
     7  // Fetching, unzipping, validating and saving both index and bloom chunks
     8  
     9  import (
    10  	"context"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"net/http"
    15  	"net/url"
    16  	"os"
    17  	"path"
    18  	"path/filepath"
    19  	"strconv"
    20  	"strings"
    21  	"sync"
    22  
    23  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/colors"
    24  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/config"
    25  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/debug"
    26  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/file"
    27  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/logger"
    28  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/progress"
    29  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/sigintTrap"
    30  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/types"
    31  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/utils"
    32  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/walk"
    33  	ants "github.com/panjf2000/ants/v2"
    34  )
    35  
    36  // jobResult type is used to carry both downloaded data and some
    37  // metadata to decompressing/file writing function through a channel
    38  type jobResult struct {
    39  	rng      string
    40  	fileSize int64
    41  	contents io.Reader
    42  	theChunk *types.ChunkRecord
    43  }
    44  
    45  type progressChan chan<- *progress.ProgressMsg
    46  
    47  // Types of errors put into the progressChannel
    48  
    49  var ErrFailedLocalFileRemoval = errors.New("failed to remove local file")
    50  var ErrUserHitControlC = errors.New("user hit control + c")
    51  var ErrDownloadError = errors.New("download error")
    52  var ErrWriteToDiscError = errors.New("write to disc error")
    53  
    54  // WorkerArguments are types meant to hold worker function arguments. We cannot
    55  // pass the arguments directly, because a worker function is expected to take one
    56  // parameter of type interface{}.
    57  type downloadWorkerArguments struct {
    58  	ctx             context.Context
    59  	progressChannel progressChan
    60  	gatewayUrl      string
    61  	downloadWg      *sync.WaitGroup
    62  	writeChannel    chan *jobResult
    63  	nRetries        int
    64  }
    65  
    66  type writeWorkerArguments struct {
    67  	ctx             context.Context
    68  	progressChannel progressChan
    69  	cancel          context.CancelFunc
    70  	writeWg         *sync.WaitGroup
    71  }
    72  
    73  // worker function type as accepted by Ants
    74  type workerFunction func(interface{})
    75  
    76  // getDownloadWorker returns a worker function that downloads a chunk
    77  func getDownloadWorker(chain string, workerArgs downloadWorkerArguments, chunkType walk.CacheType) workerFunction {
    78  	progressChannel := workerArgs.progressChannel
    79  
    80  	return func(param interface{}) {
    81  		chunk := param.(types.ChunkRecord)
    82  
    83  		defer workerArgs.downloadWg.Done()
    84  
    85  		select {
    86  		case <-workerArgs.ctx.Done():
    87  			// Cancel
    88  			return
    89  
    90  		default:
    91  			hash := chunk.BloomHash
    92  			if chunkType == walk.Index_Final {
    93  				hash = chunk.IndexHash
    94  			}
    95  			if hash != "" {
    96  
    97  				// TODO: Do we really need the colored display?
    98  				bHash := utils.FormattedHash(false, chunk.BloomHash.String())
    99  				iHash := utils.FormattedHash(false, chunk.IndexHash.String())
   100  				tHash := utils.FormattedHash(false, hash.String())
   101  				msg := fmt.Sprintf("%s %s %s", chunk.Range, bHash, iHash)
   102  				msg = strings.Replace(msg, tHash, colors.BrightCyan+tHash+colors.Off, -1)
   103  				progressChannel <- &progress.ProgressMsg{
   104  					Payload: &chunk,
   105  					Event:   progress.Start,
   106  					Message: msg,
   107  				}
   108  
   109  				download, err := fetchFromIpfsGateway(workerArgs.ctx, workerArgs.gatewayUrl, hash.String())
   110  				if errors.Is(workerArgs.ctx.Err(), context.Canceled) {
   111  					// The request to fetch the chunk was cancelled, because user has
   112  					// pressed Ctrl-C
   113  					return
   114  				}
   115  
   116  				if workerArgs.ctx.Err() != nil {
   117  					// User hit control + c - clean up both peices for the current chunk
   118  					chunkPath := filepath.Join(config.PathToIndex(chain), "finalized", chunk.Range+".bin")
   119  					removeLocalFile(ToIndexPath(chunkPath), "user canceled", progressChannel)
   120  					removeLocalFile(ToBloomPath(chunkPath), "user canceled", progressChannel)
   121  					progressChannel <- &progress.ProgressMsg{
   122  						Payload: &chunk,
   123  						Event:   progress.Error,
   124  						Error:   fmt.Errorf("%w [%s]", ErrUserHitControlC, workerArgs.ctx.Err().Error()),
   125  					}
   126  					return
   127  				}
   128  				if err == nil {
   129  					workerArgs.writeChannel <- &jobResult{
   130  						rng:      chunk.Range,
   131  						fileSize: download.ContentLen,
   132  						contents: download.Body,
   133  						theChunk: &chunk,
   134  					}
   135  				} else {
   136  					progressChannel <- &progress.ProgressMsg{
   137  						Payload: &chunk,
   138  						Event:   progress.Error,
   139  						Error:   fmt.Errorf("%w [%s]", ErrDownloadError, err.Error()),
   140  					}
   141  				}
   142  			}
   143  		}
   144  	}
   145  }
   146  
   147  // fetchResult type make it easier to return both download content and
   148  // download size information (for validation purposes)
   149  type fetchResult struct {
   150  	Body       io.ReadCloser
   151  	ContentLen int64 // download size in bytes
   152  }
   153  
   154  // fetchFromIpfsGateway downloads a chunk from an IPFS gateway using HTTP
   155  func fetchFromIpfsGateway(ctx context.Context, gateway, hash string) (*fetchResult, error) {
   156  	url, _ := url.Parse(gateway)
   157  	url.Path = path.Join(url.Path, hash)
   158  
   159  	debug.DebugCurlStr(url.String())
   160  	request, err := http.NewRequestWithContext(ctx, "GET", url.String(), nil)
   161  	if err != nil {
   162  		return nil, fmt.Errorf("NewRequestWithContext %s returned error: %w", url, err)
   163  	}
   164  
   165  	response, err := http.DefaultClient.Do(request)
   166  	if err != nil {
   167  		return nil, fmt.Errorf("DefaultClient.Do %s returned error: %w", url, err)
   168  	}
   169  
   170  	if response.StatusCode != 200 {
   171  		return nil, fmt.Errorf("fetchFromIpfsGateway %s returned status code: %d", url, response.StatusCode)
   172  	}
   173  
   174  	contentLen := int64(0)
   175  	if len(response.Header.Get("Content-Length")) != 0 {
   176  		contentLen, err = strconv.ParseInt(response.Header.Get("Content-Length"), 10, 64)
   177  		if err != nil {
   178  			return nil, fmt.Errorf("response.Header.Get %s returned error: %w", url, err)
   179  		}
   180  	}
   181  
   182  	body := response.Body
   183  	return &fetchResult{
   184  		Body:       body,
   185  		ContentLen: contentLen,
   186  	}, nil
   187  }
   188  
   189  // getWriteWorker returns a worker function that writes chunk to disk
   190  func getWriteWorker(chain string, workerArgs writeWorkerArguments, chunkType walk.CacheType) workerFunction {
   191  	progressChannel := workerArgs.progressChannel
   192  
   193  	return func(resParam interface{}) {
   194  		// Take download data from the channel and save it
   195  		res := resParam.(*jobResult)
   196  
   197  		defer workerArgs.writeWg.Done()
   198  
   199  		select {
   200  		case <-workerArgs.ctx.Done():
   201  			return
   202  		default:
   203  			cleanOnQuit := func() {
   204  				logger.Warn(sigintTrap.TrapMessage)
   205  			}
   206  			trapChannel := sigintTrap.Enable(workerArgs.ctx, workerArgs.cancel, cleanOnQuit)
   207  			err := writeBytesToDisc(chain, chunkType, res)
   208  			sigintTrap.Disable(trapChannel)
   209  			if errors.Is(workerArgs.ctx.Err(), context.Canceled) {
   210  				// Ctrl-C was pressed, cancel
   211  				return
   212  			}
   213  
   214  			if err != nil {
   215  				progressChannel <- &progress.ProgressMsg{
   216  					Payload: res.theChunk,
   217  					Event:   progress.Error,
   218  					Error:   fmt.Errorf("%w [%s]", ErrWriteToDiscError, err.Error()),
   219  				}
   220  				return
   221  			}
   222  
   223  			progressChannel <- &progress.ProgressMsg{
   224  				Payload: res.theChunk,
   225  				Event:   progress.Finished,
   226  				Message: chunkType.String(),
   227  			}
   228  		}
   229  	}
   230  }
   231  
   232  // DownloadChunks downloads, unzips and saves the chunk of type indicated by chunkType
   233  // for each chunk in chunks. ProgressMsg is reported to progressChannel.
   234  func DownloadChunks(chain string, chunksToDownload []types.ChunkRecord, chunkType walk.CacheType, poolSize int, progressChannel progressChan) {
   235  	// Context lets us handle Ctrl-C easily
   236  	ctx, cancel := context.WithCancel(context.Background())
   237  	defer func() {
   238  		cancel()
   239  	}()
   240  
   241  	var downloadWg sync.WaitGroup
   242  	writeChannel := make(chan *jobResult, poolSize)
   243  	downloadWorkerArgs := downloadWorkerArguments{
   244  		ctx:             ctx,
   245  		progressChannel: progressChannel,
   246  		downloadWg:      &downloadWg,
   247  		gatewayUrl:      config.GetChain(chain).IpfsGateway,
   248  		writeChannel:    writeChannel,
   249  		nRetries:        8,
   250  	}
   251  	downloadPool, err := ants.NewPoolWithFunc(poolSize, getDownloadWorker(chain, downloadWorkerArgs, chunkType))
   252  	defer downloadPool.Release()
   253  	if err != nil {
   254  		panic(err)
   255  	}
   256  
   257  	var writeWg sync.WaitGroup
   258  	writeWorkerArgs := writeWorkerArguments{
   259  		ctx:             ctx,
   260  		progressChannel: progressChannel,
   261  		cancel:          cancel,
   262  		writeWg:         &writeWg,
   263  	}
   264  	writePool, err := ants.NewPoolWithFunc(poolSize, getWriteWorker(chain, writeWorkerArgs, chunkType))
   265  	defer writePool.Release()
   266  	if err != nil {
   267  		panic(err)
   268  	}
   269  
   270  	// Closed in the go routine after we're finished writing or the user cancels
   271  	writeWg.Add(1)
   272  	go func() {
   273  		for result := range writeChannel {
   274  			if ctx.Err() != nil {
   275  				// The user hit Ctrl-C. It may have been disabled by sigintTrap, so we
   276  				// must drain the channel. Otherwise, it will deadlock
   277  				continue
   278  			}
   279  
   280  			// Closed inside the invocation
   281  			writeWg.Add(1)
   282  			_ = writePool.Invoke(result)
   283  		}
   284  
   285  		// Close the opening wg when all writes are finished or the user canceled
   286  		writeWg.Done()
   287  	}()
   288  
   289  	for _, chunk := range chunksToDownload {
   290  		downloadWg.Add(1)
   291  		_ = downloadPool.Invoke(chunk)
   292  	}
   293  	downloadWg.Wait()
   294  
   295  	close(writeChannel)
   296  	writeWg.Wait()
   297  
   298  	if errors.Is(ctx.Err(), context.Canceled) {
   299  		progressChannel <- &progress.ProgressMsg{
   300  			Event: progress.Cancelled,
   301  		}
   302  		return
   303  	}
   304  
   305  	progressChannel <- &progress.ProgressMsg{
   306  		Event: progress.AllDone,
   307  	}
   308  }
   309  
   310  // writeBytesToDisc save the downloaded bytes to disc
   311  func writeBytesToDisc(chain string, chunkType walk.CacheType, res *jobResult) error {
   312  	fullPath := filepath.Join(config.PathToIndex(chain), "finalized", res.rng+".bin")
   313  	if chunkType == walk.Index_Bloom {
   314  		fullPath = ToBloomPath(fullPath)
   315  	}
   316  	outputFile, err := os.OpenFile(fullPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
   317  	if err != nil {
   318  		return fmt.Errorf("error creating output file file %s in writeBytesToDisc: [%s]", res.rng, err)
   319  	}
   320  
   321  	// Save downloaded bytes to a file
   322  	_, err = io.Copy(outputFile, res.contents)
   323  	if err != nil {
   324  		if file.FileExists(outputFile.Name()) {
   325  			outputFile.Close()
   326  			os.Remove(outputFile.Name())
   327  			col := colors.Magenta
   328  			if fullPath == ToIndexPath(fullPath) {
   329  				col = colors.Yellow
   330  			}
   331  			logger.Warn("Failed download", col, res.rng, colors.Off, "(will retry)", strings.Repeat(" ", 30))
   332  		}
   333  		// Information about this error
   334  		// https://community.k6.io/t/warn-0040-request-failed-error-stream-error-stream-id-3-internal-error/777/2
   335  		return fmt.Errorf("error copying %s file in writeBytesToDisc: [%s]", res.rng, err)
   336  	}
   337  
   338  	outputFile.Close()
   339  	return nil
   340  }
   341  
   342  func removeLocalFile(fullPath, reason string, progressChannel progressChan) bool {
   343  	if file.FileExists(fullPath) {
   344  		err := os.Remove(fullPath)
   345  		if err != nil {
   346  			progressChannel <- &progress.ProgressMsg{
   347  				Event: progress.Error,
   348  				Error: fmt.Errorf("%w %s [%s]", ErrFailedLocalFileRemoval, fullPath, err.Error()),
   349  			}
   350  		} else {
   351  			progressChannel <- &progress.ProgressMsg{
   352  				Event:   progress.Update,
   353  				Message: fmt.Sprintf("File %s removed [%s]", fullPath, reason),
   354  			}
   355  		}
   356  	}
   357  	return file.FileExists(fullPath)
   358  }